triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import time
|
|
18
|
+
from abc import ABC, abstractmethod
|
|
19
|
+
from multiprocessing.pool import ThreadPool
|
|
20
|
+
|
|
21
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Monitor(ABC):
|
|
25
|
+
"""
|
|
26
|
+
Monitor abstract class is a parent class used for monitoring devices.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, frequency, metrics):
|
|
30
|
+
"""
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
frequency : float
|
|
34
|
+
How often the metrics should be monitored.
|
|
35
|
+
metrics : list
|
|
36
|
+
A list of Record objects that will be monitored.
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
TritonModelAnalyzerException
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
self._frequency = frequency
|
|
44
|
+
|
|
45
|
+
# Is the background thread active
|
|
46
|
+
self._thread_active = False
|
|
47
|
+
|
|
48
|
+
# Background thread collecting results
|
|
49
|
+
self._thread = None
|
|
50
|
+
|
|
51
|
+
# Thread pool
|
|
52
|
+
self._thread_pool = ThreadPool(processes=1)
|
|
53
|
+
self._metrics = metrics
|
|
54
|
+
|
|
55
|
+
def _monitoring_loop(self):
|
|
56
|
+
frequency = self._frequency
|
|
57
|
+
|
|
58
|
+
while self._thread_active:
|
|
59
|
+
begin = time.time()
|
|
60
|
+
# Monitoring iteration implemented by each of the subclasses
|
|
61
|
+
self._monitoring_iteration()
|
|
62
|
+
|
|
63
|
+
duration = time.time() - begin
|
|
64
|
+
if duration < frequency:
|
|
65
|
+
time.sleep(frequency - duration)
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def _monitoring_iteration(self):
|
|
69
|
+
"""
|
|
70
|
+
Each of the subclasses must implement this.
|
|
71
|
+
This is called to execute a single round of monitoring.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def _collect_records(self):
|
|
78
|
+
"""
|
|
79
|
+
This method is called to collect all the monitoring records.
|
|
80
|
+
It is called in the stop_recording_metrics function after
|
|
81
|
+
the background thread has stopped.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
List of Records
|
|
86
|
+
The list of records collected by the monitor
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def is_monitoring_connected(self) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
This method is called to determine if we can connect to the
|
|
95
|
+
monitor
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
bool
|
|
100
|
+
True if connection to the monitor was successful
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
def start_recording_metrics(self):
|
|
106
|
+
"""
|
|
107
|
+
Start recording the metrics.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
self._thread_active = True
|
|
111
|
+
self._thread = self._thread_pool.apply_async(self._monitoring_loop)
|
|
112
|
+
|
|
113
|
+
def stop_recording_metrics(self):
|
|
114
|
+
"""
|
|
115
|
+
Stop recording metrics. This will stop monitoring all the metrics.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
------
|
|
119
|
+
List of Records
|
|
120
|
+
|
|
121
|
+
Raises
|
|
122
|
+
------
|
|
123
|
+
TritonModelAnalyzerException
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
if not self._thread_active:
|
|
127
|
+
raise TritonModelAnalyzerException(
|
|
128
|
+
"start_recording_metrics should be "
|
|
129
|
+
"called before stop_recording_metrics"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
self._thread_active = False
|
|
133
|
+
self._thread = None
|
|
134
|
+
|
|
135
|
+
return self._collect_records()
|
|
136
|
+
|
|
137
|
+
def destroy(self):
|
|
138
|
+
"""
|
|
139
|
+
Cleanup threadpool resources
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
self._thread_pool.terminate()
|
|
143
|
+
self._thread_pool.close()
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
from prometheus_client.parser import text_string_to_metric_families
|
|
21
|
+
|
|
22
|
+
from model_analyzer.constants import LOGGER_NAME
|
|
23
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
24
|
+
from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory
|
|
25
|
+
from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
|
|
26
|
+
from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
|
|
27
|
+
from model_analyzer.record.types.gpu_utilization import GPUUtilization
|
|
28
|
+
|
|
29
|
+
from .monitor import Monitor
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RemoteMonitor(Monitor):
|
|
35
|
+
"""
|
|
36
|
+
Requests metrics from Triton's metrics
|
|
37
|
+
endpoint
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
gpu_metrics = {
|
|
41
|
+
"nv_gpu_utilization": GPUUtilization,
|
|
42
|
+
"nv_gpu_memory_used_bytes": GPUUsedMemory,
|
|
43
|
+
"nv_gpu_power_usage": GPUPowerUsage,
|
|
44
|
+
"nv_gpu_memory_total_bytes": GPUFreeMemory,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(self, metrics_url, frequency, metrics):
|
|
48
|
+
super().__init__(frequency, metrics)
|
|
49
|
+
self._metrics_url = metrics_url
|
|
50
|
+
self._metrics_responses = []
|
|
51
|
+
|
|
52
|
+
allowed_metrics = set(self.gpu_metrics.values())
|
|
53
|
+
if not set(metrics).issubset(allowed_metrics):
|
|
54
|
+
unsupported_metrics = set(metrics) - allowed_metrics
|
|
55
|
+
raise TritonModelAnalyzerException(
|
|
56
|
+
f"GPU monitoring does not currently support the following metrics: {unsupported_metrics}]"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def is_monitoring_connected(self) -> bool:
|
|
60
|
+
try:
|
|
61
|
+
status_code = requests.get(self._metrics_url, timeout=10).status_code
|
|
62
|
+
except Exception as ex:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
return status_code == requests.codes["okay"]
|
|
66
|
+
|
|
67
|
+
def _monitoring_iteration(self):
|
|
68
|
+
"""
|
|
69
|
+
When this function runs, it requests all the metrics
|
|
70
|
+
that triton has collected and organizes them into
|
|
71
|
+
the dict. This function should run as fast
|
|
72
|
+
as possible
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
self._metrics_responses.append(
|
|
76
|
+
str(requests.get(self._metrics_url, timeout=10).content, encoding="ascii")
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def _collect_records(self):
|
|
80
|
+
"""
|
|
81
|
+
This function will organize the metrics responses
|
|
82
|
+
and create Records out of them
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
records = []
|
|
86
|
+
|
|
87
|
+
for response in self._metrics_responses:
|
|
88
|
+
metrics = text_string_to_metric_families(response)
|
|
89
|
+
processed_gpu_used_memory = False
|
|
90
|
+
calculate_free_memory_after_pass = False
|
|
91
|
+
gpu_memory_used_bytes = None
|
|
92
|
+
for metric in metrics:
|
|
93
|
+
if (
|
|
94
|
+
metric.name in self.gpu_metrics
|
|
95
|
+
and self.gpu_metrics[metric.name] in self._metrics
|
|
96
|
+
):
|
|
97
|
+
for sample in metric.samples:
|
|
98
|
+
if sample.name == "nv_gpu_memory_used_bytes":
|
|
99
|
+
processed_gpu_used_memory = True
|
|
100
|
+
gpu_memory_used_bytes = sample.value
|
|
101
|
+
self._create_and_add_record(
|
|
102
|
+
records, sample, gpu_memory_used_bytes // 1.0e6
|
|
103
|
+
)
|
|
104
|
+
elif sample.name == "nv_gpu_memory_total_bytes":
|
|
105
|
+
if processed_gpu_used_memory:
|
|
106
|
+
self._create_and_add_record(
|
|
107
|
+
records,
|
|
108
|
+
sample,
|
|
109
|
+
(sample.value - gpu_memory_used_bytes) // 1.0e6,
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
total_memory_metric = metric
|
|
113
|
+
calculate_free_memory_after_pass = True
|
|
114
|
+
elif sample.name == "nv_gpu_utilization":
|
|
115
|
+
self._create_and_add_record(
|
|
116
|
+
records, sample, sample.value * 100
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
self._create_and_add_record(records, sample, sample.value)
|
|
120
|
+
if calculate_free_memory_after_pass:
|
|
121
|
+
for sample in total_memory_metric.samples:
|
|
122
|
+
self._create_and_add_record(
|
|
123
|
+
records, sample, (sample.value - gpu_memory_used_bytes) // 1.0e6
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return records
|
|
127
|
+
|
|
128
|
+
def _create_and_add_record(self, records, sample, sample_value):
|
|
129
|
+
"""
|
|
130
|
+
Adds a record to given dict
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
records.append(
|
|
134
|
+
self.gpu_metrics[sample.name](
|
|
135
|
+
value=sample_value, device_uuid=sample.labels["gpu_uuid"]
|
|
136
|
+
)
|
|
137
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
18
|
+
|
|
19
|
+
from .output_writer import OutputWriter
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FileWriter(OutputWriter):
|
|
23
|
+
"""
|
|
24
|
+
Writes table to a file or stdout
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, filename=None):
|
|
28
|
+
"""
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
filename : File
|
|
32
|
+
The full path to the file or stream to write the output to.
|
|
33
|
+
Writes to stdout if filename is None
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
self._filename = filename
|
|
37
|
+
|
|
38
|
+
def write(self, out, append=False):
|
|
39
|
+
"""
|
|
40
|
+
Writes the output to a file or stdout
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
out : str
|
|
45
|
+
The string to be written to the
|
|
46
|
+
file or stdout
|
|
47
|
+
|
|
48
|
+
Raises
|
|
49
|
+
------
|
|
50
|
+
TritonModelAnalyzerException
|
|
51
|
+
If there is an error or exception while writing
|
|
52
|
+
the output.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
write_mode = "a+" if append else "w+"
|
|
56
|
+
if self._filename:
|
|
57
|
+
try:
|
|
58
|
+
with open(self._filename, write_mode) as f:
|
|
59
|
+
f.write(out)
|
|
60
|
+
except OSError as e:
|
|
61
|
+
raise TritonModelAnalyzerException(e)
|
|
62
|
+
else:
|
|
63
|
+
print(out, end="")
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class OutputWriter(ABC):
|
|
21
|
+
"""
|
|
22
|
+
Interface that receives a table
|
|
23
|
+
and writes the table to a file or stream.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def write(self, out):
|
|
28
|
+
"""
|
|
29
|
+
Writes the output to a file
|
|
30
|
+
(stdout, .txt, .csv etc.)
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
out : str
|
|
35
|
+
The string to be written out
|
|
36
|
+
|
|
37
|
+
Raises
|
|
38
|
+
------
|
|
39
|
+
TritonModelAnalyzerException
|
|
40
|
+
If there is an error or exception while writing
|
|
41
|
+
the output.
|
|
42
|
+
"""
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GenaiPerfConfig:
|
|
21
|
+
"""
|
|
22
|
+
A config class to set arguments to the genai_perf.
|
|
23
|
+
An argument set to None will use the genai_perf's default.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
genai_perf_args = [
|
|
27
|
+
"backend",
|
|
28
|
+
"endpoint",
|
|
29
|
+
"service-kind",
|
|
30
|
+
"url",
|
|
31
|
+
"expected-output-tokens",
|
|
32
|
+
"input-dataset",
|
|
33
|
+
"input-tokens-mean",
|
|
34
|
+
"input-tokens-stddev",
|
|
35
|
+
"input-type",
|
|
36
|
+
"num-of-output-prompts",
|
|
37
|
+
"random-seed",
|
|
38
|
+
"streaming",
|
|
39
|
+
"tokenizer",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
boolean_args = ["streaming"]
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
"""
|
|
46
|
+
Construct a GenaiPerfConfig
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
self._args = {k: None for k in self.genai_perf_args}
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def allowed_keys(cls):
|
|
53
|
+
"""
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
list of str
|
|
57
|
+
The keys that are allowed to be
|
|
58
|
+
passed into perf_analyzer
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
return cls.genai_perf_args
|
|
62
|
+
|
|
63
|
+
def update_config(self, params=None):
|
|
64
|
+
"""
|
|
65
|
+
Allows setting values from a params dict
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
params: dict
|
|
70
|
+
keys are allowed args to perf_analyzer
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
if params and type(params) is dict:
|
|
74
|
+
for key in params:
|
|
75
|
+
self[key] = params[key]
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def from_dict(cls, genai_perf_config_dict):
|
|
79
|
+
genai_perf_config = GenaiPerfConfig()
|
|
80
|
+
for key in [
|
|
81
|
+
"_args",
|
|
82
|
+
]:
|
|
83
|
+
if key in genai_perf_config_dict:
|
|
84
|
+
setattr(genai_perf_config, key, genai_perf_config_dict[key])
|
|
85
|
+
return genai_perf_config
|
|
86
|
+
|
|
87
|
+
def representation(self):
|
|
88
|
+
"""
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
str
|
|
92
|
+
a string representation of the Genai Perf config
|
|
93
|
+
that removes values which can vary between
|
|
94
|
+
runs, but should be ignored when determining
|
|
95
|
+
if a previous (checkpointed) run can be used
|
|
96
|
+
"""
|
|
97
|
+
cli_string = self.to_cli_string()
|
|
98
|
+
|
|
99
|
+
return cli_string
|
|
100
|
+
|
|
101
|
+
def to_cli_string(self) -> str:
|
|
102
|
+
"""
|
|
103
|
+
Utility function to convert a config into a
|
|
104
|
+
string of arguments to the perf_analyzer with CLI.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
str
|
|
109
|
+
cli command string consisting of all arguments
|
|
110
|
+
to the perf_analyzer set in the config, without
|
|
111
|
+
the executable name.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
# single dashed options, then verbose flags, then main args
|
|
115
|
+
args = []
|
|
116
|
+
args.extend(self._parse_options())
|
|
117
|
+
|
|
118
|
+
return " ".join(args)
|
|
119
|
+
|
|
120
|
+
def _parse_options(self):
|
|
121
|
+
"""
|
|
122
|
+
Parse the genai perf args
|
|
123
|
+
"""
|
|
124
|
+
temp_args = []
|
|
125
|
+
for key, value in self._args.items():
|
|
126
|
+
if key in self.boolean_args:
|
|
127
|
+
temp_args = self._parse_boolean_args(key, value, temp_args)
|
|
128
|
+
elif value:
|
|
129
|
+
temp_args.append(f"--{key}={value}")
|
|
130
|
+
return temp_args
|
|
131
|
+
|
|
132
|
+
def _parse_boolean_args(self, key, value, temp_args):
|
|
133
|
+
"""
|
|
134
|
+
Parse genai perf args that should not add a value to the cli string
|
|
135
|
+
"""
|
|
136
|
+
assert type(value) in [
|
|
137
|
+
str,
|
|
138
|
+
type(None),
|
|
139
|
+
], f"Data type for arg {key} must be a (boolean) string instead of {type(value)}"
|
|
140
|
+
if value != None and value.lower() == "true":
|
|
141
|
+
temp_args.append(f"--{key}")
|
|
142
|
+
return temp_args
|
|
143
|
+
|
|
144
|
+
def __getitem__(self, key):
|
|
145
|
+
"""
|
|
146
|
+
Gets an arguments value in config
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
key : str
|
|
151
|
+
The name of the argument to the genai perf config
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
object
|
|
156
|
+
The value that the argument is set to in this config
|
|
157
|
+
|
|
158
|
+
Raises
|
|
159
|
+
------
|
|
160
|
+
KeyError
|
|
161
|
+
If argument not found in the config
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
if key in self._args:
|
|
165
|
+
return self._args[key]
|
|
166
|
+
else:
|
|
167
|
+
raise TritonModelAnalyzerException(
|
|
168
|
+
f"Key {key} does not exist in genai_perf_flags."
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def __setitem__(self, key, value):
|
|
172
|
+
"""
|
|
173
|
+
Sets an arguments value in config
|
|
174
|
+
after checking if defined/supported.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
key : str
|
|
179
|
+
The name of the argument in genai_perf
|
|
180
|
+
value : (any)
|
|
181
|
+
The value to which the argument is being set
|
|
182
|
+
|
|
183
|
+
Raises
|
|
184
|
+
------
|
|
185
|
+
TritonModelAnalyzerException
|
|
186
|
+
If key is unsupported or undefined in the
|
|
187
|
+
config class
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
if key in self._args:
|
|
191
|
+
self._args[key] = value
|
|
192
|
+
else:
|
|
193
|
+
raise TritonModelAnalyzerException(
|
|
194
|
+
f"The argument '{key}' to the genai_perf "
|
|
195
|
+
"is not supported by model analyzer."
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def __contains__(self, key):
|
|
199
|
+
"""
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
True if key is in perf_config i.e. the key is a
|
|
203
|
+
genai perf config argument
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
return key in GenaiPerfConfig.allowed_keys()
|