triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.record import DecreasingRecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class CPUUsedRAM(DecreasingRecord):
|
|
24
|
+
"""
|
|
25
|
+
The CPU memory usage record
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "cpu_used_ram"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
CPU used memory
|
|
36
|
+
timestamp : int
|
|
37
|
+
The timestamp for the record in nanoseconds
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def header(aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return ("Max " if aggregation_tag else "") + "RAM Usage (MB)"
|
|
61
|
+
|
|
62
|
+
def __eq__(self, other):
|
|
63
|
+
"""
|
|
64
|
+
Allows checking for
|
|
65
|
+
equality between two records
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
return self.value() == other.value()
|
|
69
|
+
|
|
70
|
+
def __lt__(self, other):
|
|
71
|
+
"""
|
|
72
|
+
Allows checking if
|
|
73
|
+
this record is better than
|
|
74
|
+
the other
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
return self.value() > other.value()
|
|
78
|
+
|
|
79
|
+
def __add__(self, other):
|
|
80
|
+
"""
|
|
81
|
+
Allows adding two records together
|
|
82
|
+
to produce a brand new record.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
return CPUUsedRAM(value=(self.value() + other.value()))
|
|
86
|
+
|
|
87
|
+
def __sub__(self, other):
|
|
88
|
+
"""
|
|
89
|
+
Allows subtracting two records together
|
|
90
|
+
to produce a brand new record.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
return CPUUsedRAM(value=(other.value() - self.value()))
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.gpu_record import IncreasingGPURecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class GPUFreeMemory(IncreasingGPURecord):
|
|
24
|
+
"""
|
|
25
|
+
The free memory in the GPU.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "gpu_free_memory"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
The value of the GPU metrtic
|
|
36
|
+
device_uuid : str
|
|
37
|
+
The GPU device uuid this metric is associated
|
|
38
|
+
with.
|
|
39
|
+
timestamp : int
|
|
40
|
+
The timestamp for the record in nanoseconds
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
super().__init__(value, device_uuid, timestamp)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def header(aggregation_tag=False):
|
|
47
|
+
"""
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
aggregation_tag: bool
|
|
51
|
+
An optional tag that may be displayed
|
|
52
|
+
as part of the header indicating that
|
|
53
|
+
this record has been aggregated using
|
|
54
|
+
max, min or average etc.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
str
|
|
59
|
+
The full name of the
|
|
60
|
+
metric.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)"
|
|
64
|
+
|
|
65
|
+
def __eq__(self, other):
|
|
66
|
+
"""
|
|
67
|
+
Allows checking for
|
|
68
|
+
equality between two records
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
return self.value() == other.value()
|
|
72
|
+
|
|
73
|
+
def __lt__(self, other):
|
|
74
|
+
"""
|
|
75
|
+
Allows checking if
|
|
76
|
+
this record is less than
|
|
77
|
+
the other
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
return self.value() < other.value()
|
|
81
|
+
|
|
82
|
+
def __add__(self, other):
|
|
83
|
+
"""
|
|
84
|
+
Allows adding two records together
|
|
85
|
+
to produce a brand new record.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
return GPUFreeMemory(device_uuid=None, value=(self.value() + other.value()))
|
|
89
|
+
|
|
90
|
+
def __sub__(self, other):
|
|
91
|
+
"""
|
|
92
|
+
Allows subtracting two records together
|
|
93
|
+
to produce a brand new record.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
return GPUFreeMemory(device_uuid=None, value=(self.value() - other.value()))
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.gpu_record import DecreasingGPURecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class GPUPowerUsage(DecreasingGPURecord):
|
|
24
|
+
"""
|
|
25
|
+
GPU Power Usage
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "gpu_power_usage"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float-compatible value
|
|
35
|
+
The value of the GPU metrtic
|
|
36
|
+
device_uuid : str
|
|
37
|
+
The GPU device uuid this metric is associated
|
|
38
|
+
with.
|
|
39
|
+
timestamp : int
|
|
40
|
+
The timestamp for the record in nanoseconds
|
|
41
|
+
"""
|
|
42
|
+
value = float(value)
|
|
43
|
+
super().__init__(value, device_uuid, timestamp)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def aggregation_function():
|
|
47
|
+
"""
|
|
48
|
+
The function that is used to aggregate
|
|
49
|
+
this type of record
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def average(seq):
|
|
53
|
+
return sum(seq[1:], start=seq[0]) / len(seq)
|
|
54
|
+
|
|
55
|
+
return average
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def header(aggregation_tag=False):
|
|
59
|
+
"""
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
aggregation_tag: bool
|
|
63
|
+
An optional tag that may be displayed as part of the header
|
|
64
|
+
indicating that this record has been aggregated using max, min or
|
|
65
|
+
average etc.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
str
|
|
70
|
+
The full name of the
|
|
71
|
+
metric.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
return ("Average " if aggregation_tag else "") + "GPU Power Usage (W)"
|
|
75
|
+
|
|
76
|
+
def __eq__(self, other):
|
|
77
|
+
"""
|
|
78
|
+
Allows checking for
|
|
79
|
+
equality between two records
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
return self.value() == other.value()
|
|
83
|
+
|
|
84
|
+
def __lt__(self, other):
|
|
85
|
+
"""
|
|
86
|
+
Allows checking if
|
|
87
|
+
this record is less than
|
|
88
|
+
the other
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
return other.value() < self.value()
|
|
92
|
+
|
|
93
|
+
def __add__(self, other):
|
|
94
|
+
"""
|
|
95
|
+
Allows adding two records together
|
|
96
|
+
to produce a brand new record.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
return GPUPowerUsage(device_uuid=None, value=(self.value() + other.value()))
|
|
100
|
+
|
|
101
|
+
def __sub__(self, other):
|
|
102
|
+
"""
|
|
103
|
+
Allows subtracting two records together
|
|
104
|
+
to produce a brand new record.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
return GPUPowerUsage(device_uuid=None, value=(other.value() - self.value()))
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.gpu_record import IncreasingGPURecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class GPUTotalMemory(IncreasingGPURecord):
|
|
24
|
+
"""
|
|
25
|
+
The total memory in the GPU.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "gpu_total_memory"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
The value of the GPU metrtic
|
|
36
|
+
device_uuid : str
|
|
37
|
+
The GPU device uuid this metric is associated
|
|
38
|
+
with.
|
|
39
|
+
timestamp : int
|
|
40
|
+
The timestamp for the record in nanoseconds
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
super().__init__(value, device_uuid, timestamp)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def header(aggregation_tag=False):
|
|
47
|
+
"""
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
aggregation_tag: bool
|
|
51
|
+
An optional tag that may be displayed
|
|
52
|
+
as part of the header indicating that
|
|
53
|
+
this record has been aggregated using
|
|
54
|
+
max, min or average etc.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
str
|
|
59
|
+
The full name of the
|
|
60
|
+
metric.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)"
|
|
64
|
+
|
|
65
|
+
def __eq__(self, other):
|
|
66
|
+
"""
|
|
67
|
+
Allows checking for
|
|
68
|
+
equality between two records
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
return self.value() == other.value()
|
|
72
|
+
|
|
73
|
+
def __lt__(self, other):
|
|
74
|
+
"""
|
|
75
|
+
Allows checking if
|
|
76
|
+
this record is less than
|
|
77
|
+
the other
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
return self.value() < other.value()
|
|
81
|
+
|
|
82
|
+
def __add__(self, other):
|
|
83
|
+
"""
|
|
84
|
+
Allows adding two records together
|
|
85
|
+
to produce a brand new record.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
return GPUTotalMemory(device_uuid=None, value=(self.value() + other.value()))
|
|
89
|
+
|
|
90
|
+
def __sub__(self, other):
|
|
91
|
+
"""
|
|
92
|
+
Allows subtracting two records together
|
|
93
|
+
to produce a brand new record.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
return GPUTotalMemory(device_uuid=None, value=(self.value() - other.value()))
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.gpu_record import DecreasingGPURecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class GPUUsedMemory(DecreasingGPURecord):
|
|
24
|
+
"""
|
|
25
|
+
The used memory in the GPU.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "gpu_used_memory"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
The value of the GPU metrtic
|
|
36
|
+
device_uuid : str
|
|
37
|
+
The GPU device uuid this metric is associated
|
|
38
|
+
with.
|
|
39
|
+
timestamp : int
|
|
40
|
+
The timestamp for the record in nanoseconds
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
super().__init__(value, device_uuid, timestamp)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def header(aggregation_tag=False):
|
|
47
|
+
"""
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
aggregation_tag: bool
|
|
51
|
+
An optional tag that may be displayed
|
|
52
|
+
as part of the header indicating that
|
|
53
|
+
this record has been aggregated using
|
|
54
|
+
max, min or average etc.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
str
|
|
59
|
+
The full name of the
|
|
60
|
+
metric.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
return ("Max " if aggregation_tag else "") + "GPU Memory Usage (MB)"
|
|
64
|
+
|
|
65
|
+
def __eq__(self, other):
|
|
66
|
+
"""
|
|
67
|
+
Allows checking for
|
|
68
|
+
equality between two records
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
return self.value() == other.value()
|
|
72
|
+
|
|
73
|
+
def __lt__(self, other):
|
|
74
|
+
"""
|
|
75
|
+
Allows checking if
|
|
76
|
+
this record is less than
|
|
77
|
+
the other
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
return self.value() > other.value()
|
|
81
|
+
|
|
82
|
+
def __add__(self, other):
|
|
83
|
+
"""
|
|
84
|
+
Allows adding two records together
|
|
85
|
+
to produce a brand new record.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
return GPUUsedMemory(device_uuid=None, value=(self.value() + other.value()))
|
|
89
|
+
|
|
90
|
+
def __sub__(self, other):
|
|
91
|
+
"""
|
|
92
|
+
Allows subtracting two records together
|
|
93
|
+
to produce a brand new record.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
return GPUUsedMemory(device_uuid=None, value=(other.value() - self.value()))
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.gpu_record import IncreasingGPURecord
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class GPUUtilization(IncreasingGPURecord):
|
|
24
|
+
"""
|
|
25
|
+
GPU utilization record
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "gpu_utilization"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, device_uuid=None, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
The value of the GPU metrtic
|
|
36
|
+
device_uuid : str
|
|
37
|
+
The GPU device uuid this metric is associated
|
|
38
|
+
with.
|
|
39
|
+
timestamp : int
|
|
40
|
+
The timestamp for the record in nanoseconds
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
super().__init__(value, device_uuid, timestamp)
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def aggregation_function():
|
|
47
|
+
"""
|
|
48
|
+
The function that is used to aggregate
|
|
49
|
+
this type of record
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def average(seq):
|
|
53
|
+
return sum(seq[1:], start=seq[0]) / len(seq)
|
|
54
|
+
|
|
55
|
+
return average
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def header(aggregation_tag=False):
|
|
59
|
+
"""
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
aggregation_tag: bool
|
|
63
|
+
An optional tag that may be displayed
|
|
64
|
+
as part of the header indicating that
|
|
65
|
+
this record has been aggregated using
|
|
66
|
+
max, min or average etc.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
str
|
|
71
|
+
The full name of the
|
|
72
|
+
metric.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
return ("Average " if aggregation_tag else "") + "GPU Utilization (%)"
|
|
76
|
+
|
|
77
|
+
def __eq__(self, other):
|
|
78
|
+
"""
|
|
79
|
+
Allows checking for
|
|
80
|
+
equality between two records
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
return self.value() == other.value()
|
|
84
|
+
|
|
85
|
+
def __lt__(self, other):
|
|
86
|
+
"""
|
|
87
|
+
Allows checking if
|
|
88
|
+
this record is less than
|
|
89
|
+
the other
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
return self.value() < other.value()
|
|
93
|
+
|
|
94
|
+
def __add__(self, other):
|
|
95
|
+
"""
|
|
96
|
+
Allows adding two records together
|
|
97
|
+
to produce a brand new record.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
return GPUUtilization(device_uuid=None, value=(self.value() + other.value()))
|
|
101
|
+
|
|
102
|
+
def __sub__(self, other):
|
|
103
|
+
"""
|
|
104
|
+
Allows subtracting two records together
|
|
105
|
+
to produce a brand new record.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
return GPUUtilization(device_uuid=None, value=(self.value() - other.value()))
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class InterTokenLatencyAvg(InterTokenLatencyBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Inter token latency metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "inter_token_latency_avg"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "Avg Inter Token Latency (ms)"
|