triton-model-analyzer 1.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_analyzer/__init__.py +15 -0
- model_analyzer/analyzer.py +448 -0
- model_analyzer/cli/__init__.py +15 -0
- model_analyzer/cli/cli.py +193 -0
- model_analyzer/config/__init__.py +15 -0
- model_analyzer/config/generate/__init__.py +15 -0
- model_analyzer/config/generate/automatic_model_config_generator.py +164 -0
- model_analyzer/config/generate/base_model_config_generator.py +352 -0
- model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py +164 -0
- model_analyzer/config/generate/brute_run_config_generator.py +154 -0
- model_analyzer/config/generate/concurrency_sweeper.py +75 -0
- model_analyzer/config/generate/config_generator_interface.py +52 -0
- model_analyzer/config/generate/coordinate.py +143 -0
- model_analyzer/config/generate/coordinate_data.py +86 -0
- model_analyzer/config/generate/generator_utils.py +116 -0
- model_analyzer/config/generate/manual_model_config_generator.py +187 -0
- model_analyzer/config/generate/model_config_generator_factory.py +92 -0
- model_analyzer/config/generate/model_profile_spec.py +74 -0
- model_analyzer/config/generate/model_run_config_generator.py +154 -0
- model_analyzer/config/generate/model_variant_name_manager.py +150 -0
- model_analyzer/config/generate/neighborhood.py +536 -0
- model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +141 -0
- model_analyzer/config/generate/optuna_run_config_generator.py +838 -0
- model_analyzer/config/generate/perf_analyzer_config_generator.py +312 -0
- model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py +130 -0
- model_analyzer/config/generate/quick_run_config_generator.py +753 -0
- model_analyzer/config/generate/run_config_generator_factory.py +329 -0
- model_analyzer/config/generate/search_config.py +112 -0
- model_analyzer/config/generate/search_dimension.py +73 -0
- model_analyzer/config/generate/search_dimensions.py +85 -0
- model_analyzer/config/generate/search_parameter.py +49 -0
- model_analyzer/config/generate/search_parameters.py +388 -0
- model_analyzer/config/input/__init__.py +15 -0
- model_analyzer/config/input/config_command.py +483 -0
- model_analyzer/config/input/config_command_profile.py +1747 -0
- model_analyzer/config/input/config_command_report.py +267 -0
- model_analyzer/config/input/config_defaults.py +236 -0
- model_analyzer/config/input/config_enum.py +83 -0
- model_analyzer/config/input/config_field.py +216 -0
- model_analyzer/config/input/config_list_generic.py +112 -0
- model_analyzer/config/input/config_list_numeric.py +151 -0
- model_analyzer/config/input/config_list_string.py +111 -0
- model_analyzer/config/input/config_none.py +71 -0
- model_analyzer/config/input/config_object.py +129 -0
- model_analyzer/config/input/config_primitive.py +81 -0
- model_analyzer/config/input/config_status.py +75 -0
- model_analyzer/config/input/config_sweep.py +83 -0
- model_analyzer/config/input/config_union.py +113 -0
- model_analyzer/config/input/config_utils.py +128 -0
- model_analyzer/config/input/config_value.py +243 -0
- model_analyzer/config/input/objects/__init__.py +15 -0
- model_analyzer/config/input/objects/config_model_profile_spec.py +325 -0
- model_analyzer/config/input/objects/config_model_report_spec.py +173 -0
- model_analyzer/config/input/objects/config_plot.py +198 -0
- model_analyzer/config/input/objects/config_protobuf_utils.py +101 -0
- model_analyzer/config/input/yaml_config_validator.py +82 -0
- model_analyzer/config/run/__init__.py +15 -0
- model_analyzer/config/run/model_run_config.py +313 -0
- model_analyzer/config/run/run_config.py +168 -0
- model_analyzer/constants.py +76 -0
- model_analyzer/device/__init__.py +15 -0
- model_analyzer/device/device.py +24 -0
- model_analyzer/device/gpu_device.py +87 -0
- model_analyzer/device/gpu_device_factory.py +248 -0
- model_analyzer/entrypoint.py +307 -0
- model_analyzer/log_formatter.py +65 -0
- model_analyzer/model_analyzer_exceptions.py +24 -0
- model_analyzer/model_manager.py +255 -0
- model_analyzer/monitor/__init__.py +15 -0
- model_analyzer/monitor/cpu_monitor.py +69 -0
- model_analyzer/monitor/dcgm/DcgmDiag.py +191 -0
- model_analyzer/monitor/dcgm/DcgmFieldGroup.py +83 -0
- model_analyzer/monitor/dcgm/DcgmGroup.py +815 -0
- model_analyzer/monitor/dcgm/DcgmHandle.py +141 -0
- model_analyzer/monitor/dcgm/DcgmJsonReader.py +69 -0
- model_analyzer/monitor/dcgm/DcgmReader.py +623 -0
- model_analyzer/monitor/dcgm/DcgmStatus.py +57 -0
- model_analyzer/monitor/dcgm/DcgmSystem.py +412 -0
- model_analyzer/monitor/dcgm/__init__.py +15 -0
- model_analyzer/monitor/dcgm/common/__init__.py +13 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_cli_parser.py +194 -0
- model_analyzer/monitor/dcgm/common/dcgm_client_main.py +86 -0
- model_analyzer/monitor/dcgm/dcgm_agent.py +887 -0
- model_analyzer/monitor/dcgm/dcgm_collectd_plugin.py +369 -0
- model_analyzer/monitor/dcgm/dcgm_errors.py +395 -0
- model_analyzer/monitor/dcgm/dcgm_field_helpers.py +546 -0
- model_analyzer/monitor/dcgm/dcgm_fields.py +815 -0
- model_analyzer/monitor/dcgm/dcgm_fields_collectd.py +671 -0
- model_analyzer/monitor/dcgm/dcgm_fields_internal.py +29 -0
- model_analyzer/monitor/dcgm/dcgm_fluentd.py +45 -0
- model_analyzer/monitor/dcgm/dcgm_monitor.py +138 -0
- model_analyzer/monitor/dcgm/dcgm_prometheus.py +326 -0
- model_analyzer/monitor/dcgm/dcgm_structs.py +2357 -0
- model_analyzer/monitor/dcgm/dcgm_telegraf.py +65 -0
- model_analyzer/monitor/dcgm/dcgm_value.py +151 -0
- model_analyzer/monitor/dcgm/dcgmvalue.py +155 -0
- model_analyzer/monitor/dcgm/denylist_recommendations.py +573 -0
- model_analyzer/monitor/dcgm/pydcgm.py +47 -0
- model_analyzer/monitor/monitor.py +143 -0
- model_analyzer/monitor/remote_monitor.py +137 -0
- model_analyzer/output/__init__.py +15 -0
- model_analyzer/output/file_writer.py +63 -0
- model_analyzer/output/output_writer.py +42 -0
- model_analyzer/perf_analyzer/__init__.py +15 -0
- model_analyzer/perf_analyzer/genai_perf_config.py +206 -0
- model_analyzer/perf_analyzer/perf_analyzer.py +882 -0
- model_analyzer/perf_analyzer/perf_config.py +479 -0
- model_analyzer/plots/__init__.py +15 -0
- model_analyzer/plots/detailed_plot.py +266 -0
- model_analyzer/plots/plot_manager.py +224 -0
- model_analyzer/plots/simple_plot.py +213 -0
- model_analyzer/record/__init__.py +15 -0
- model_analyzer/record/gpu_record.py +68 -0
- model_analyzer/record/metrics_manager.py +887 -0
- model_analyzer/record/record.py +280 -0
- model_analyzer/record/record_aggregator.py +256 -0
- model_analyzer/record/types/__init__.py +15 -0
- model_analyzer/record/types/cpu_available_ram.py +93 -0
- model_analyzer/record/types/cpu_used_ram.py +93 -0
- model_analyzer/record/types/gpu_free_memory.py +96 -0
- model_analyzer/record/types/gpu_power_usage.py +107 -0
- model_analyzer/record/types/gpu_total_memory.py +96 -0
- model_analyzer/record/types/gpu_used_memory.py +96 -0
- model_analyzer/record/types/gpu_utilization.py +108 -0
- model_analyzer/record/types/inter_token_latency_avg.py +60 -0
- model_analyzer/record/types/inter_token_latency_base.py +74 -0
- model_analyzer/record/types/inter_token_latency_max.py +60 -0
- model_analyzer/record/types/inter_token_latency_min.py +60 -0
- model_analyzer/record/types/inter_token_latency_p25.py +60 -0
- model_analyzer/record/types/inter_token_latency_p50.py +60 -0
- model_analyzer/record/types/inter_token_latency_p75.py +60 -0
- model_analyzer/record/types/inter_token_latency_p90.py +60 -0
- model_analyzer/record/types/inter_token_latency_p95.py +60 -0
- model_analyzer/record/types/inter_token_latency_p99.py +60 -0
- model_analyzer/record/types/output_token_throughput.py +105 -0
- model_analyzer/record/types/perf_client_response_wait.py +97 -0
- model_analyzer/record/types/perf_client_send_recv.py +97 -0
- model_analyzer/record/types/perf_latency.py +111 -0
- model_analyzer/record/types/perf_latency_avg.py +60 -0
- model_analyzer/record/types/perf_latency_base.py +74 -0
- model_analyzer/record/types/perf_latency_p90.py +60 -0
- model_analyzer/record/types/perf_latency_p95.py +60 -0
- model_analyzer/record/types/perf_latency_p99.py +60 -0
- model_analyzer/record/types/perf_server_compute_infer.py +97 -0
- model_analyzer/record/types/perf_server_compute_input.py +97 -0
- model_analyzer/record/types/perf_server_compute_output.py +97 -0
- model_analyzer/record/types/perf_server_queue.py +97 -0
- model_analyzer/record/types/perf_throughput.py +105 -0
- model_analyzer/record/types/time_to_first_token_avg.py +60 -0
- model_analyzer/record/types/time_to_first_token_base.py +74 -0
- model_analyzer/record/types/time_to_first_token_max.py +60 -0
- model_analyzer/record/types/time_to_first_token_min.py +60 -0
- model_analyzer/record/types/time_to_first_token_p25.py +60 -0
- model_analyzer/record/types/time_to_first_token_p50.py +60 -0
- model_analyzer/record/types/time_to_first_token_p75.py +60 -0
- model_analyzer/record/types/time_to_first_token_p90.py +60 -0
- model_analyzer/record/types/time_to_first_token_p95.py +60 -0
- model_analyzer/record/types/time_to_first_token_p99.py +60 -0
- model_analyzer/reports/__init__.py +15 -0
- model_analyzer/reports/html_report.py +195 -0
- model_analyzer/reports/pdf_report.py +50 -0
- model_analyzer/reports/report.py +86 -0
- model_analyzer/reports/report_factory.py +62 -0
- model_analyzer/reports/report_manager.py +1376 -0
- model_analyzer/reports/report_utils.py +42 -0
- model_analyzer/result/__init__.py +15 -0
- model_analyzer/result/constraint_manager.py +150 -0
- model_analyzer/result/model_config_measurement.py +354 -0
- model_analyzer/result/model_constraints.py +105 -0
- model_analyzer/result/parameter_search.py +246 -0
- model_analyzer/result/result_manager.py +430 -0
- model_analyzer/result/result_statistics.py +159 -0
- model_analyzer/result/result_table.py +217 -0
- model_analyzer/result/result_table_manager.py +646 -0
- model_analyzer/result/result_utils.py +42 -0
- model_analyzer/result/results.py +277 -0
- model_analyzer/result/run_config_measurement.py +658 -0
- model_analyzer/result/run_config_result.py +210 -0
- model_analyzer/result/run_config_result_comparator.py +110 -0
- model_analyzer/result/sorted_results.py +151 -0
- model_analyzer/state/__init__.py +15 -0
- model_analyzer/state/analyzer_state.py +76 -0
- model_analyzer/state/analyzer_state_manager.py +215 -0
- model_analyzer/triton/__init__.py +15 -0
- model_analyzer/triton/client/__init__.py +15 -0
- model_analyzer/triton/client/client.py +234 -0
- model_analyzer/triton/client/client_factory.py +57 -0
- model_analyzer/triton/client/grpc_client.py +104 -0
- model_analyzer/triton/client/http_client.py +107 -0
- model_analyzer/triton/model/__init__.py +15 -0
- model_analyzer/triton/model/model_config.py +556 -0
- model_analyzer/triton/model/model_config_variant.py +29 -0
- model_analyzer/triton/server/__init__.py +15 -0
- model_analyzer/triton/server/server.py +76 -0
- model_analyzer/triton/server/server_config.py +269 -0
- model_analyzer/triton/server/server_docker.py +229 -0
- model_analyzer/triton/server/server_factory.py +306 -0
- model_analyzer/triton/server/server_local.py +158 -0
- triton_model_analyzer-1.48.0.dist-info/METADATA +52 -0
- triton_model_analyzer-1.48.0.dist-info/RECORD +204 -0
- triton_model_analyzer-1.48.0.dist-info/WHEEL +5 -0
- triton_model_analyzer-1.48.0.dist-info/entry_points.txt +2 -0
- triton_model_analyzer-1.48.0.dist-info/licenses/LICENSE +67 -0
- triton_model_analyzer-1.48.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenMin(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_min"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "Min Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP25(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p25"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p25 Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP50(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p50"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p50 Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP75(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p75"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p75 Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP90(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p90"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p90 Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP95(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p95"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p95 Time To First Token (ms)"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
from functools import total_ordering
|
|
18
|
+
|
|
19
|
+
from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@total_ordering
|
|
23
|
+
class TimeToFirstTokenP99(TimeToFirstTokenBase):
|
|
24
|
+
"""
|
|
25
|
+
A record for perf_analyzer Time to first token metric
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
tag = "time_to_first_token_p99"
|
|
29
|
+
|
|
30
|
+
def __init__(self, value, timestamp=0):
|
|
31
|
+
"""
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
value : float
|
|
35
|
+
the latency extracted from the perf analyzer output
|
|
36
|
+
timestamp : float
|
|
37
|
+
Elapsed time from start of program
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
super().__init__(value, timestamp)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def header(cls, aggregation_tag=False):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
aggregation_tag: bool
|
|
48
|
+
An optional tag that may be displayed
|
|
49
|
+
as part of the header indicating that
|
|
50
|
+
this record has been aggregated using
|
|
51
|
+
max, min or average etc.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
str
|
|
56
|
+
The full name of the
|
|
57
|
+
metric.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
return "p99 Time To First Token (ms)"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import base64
|
|
18
|
+
|
|
19
|
+
from .report import Report
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HTMLReport(Report):
|
|
23
|
+
"""
|
|
24
|
+
A report that gets
|
|
25
|
+
constructed in html
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, html_report=None):
|
|
29
|
+
if html_report is not None:
|
|
30
|
+
self._head = html_report._head
|
|
31
|
+
self._body = html_report._body
|
|
32
|
+
else:
|
|
33
|
+
self._head = ""
|
|
34
|
+
self._body = ""
|
|
35
|
+
|
|
36
|
+
def head(self):
|
|
37
|
+
"""
|
|
38
|
+
Get the head section of
|
|
39
|
+
the html document
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
return f"<head><style>{self._head}</style></head>"
|
|
43
|
+
|
|
44
|
+
def body(self):
|
|
45
|
+
"""
|
|
46
|
+
Get the body section of
|
|
47
|
+
the html document
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
return f"<body>{self._body}</body>"
|
|
51
|
+
|
|
52
|
+
def document(self):
|
|
53
|
+
"""
|
|
54
|
+
Get the html content of
|
|
55
|
+
this HTMLReport
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
return f"<html>{self.head()}{self.body()}</html>"
|
|
59
|
+
|
|
60
|
+
def add_title(self, title):
|
|
61
|
+
"""
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
title: str
|
|
65
|
+
The title of the report
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
self._body += f"<center><h1>{title}</h1></center>"
|
|
69
|
+
|
|
70
|
+
def add_subheading(self, subheading):
|
|
71
|
+
"""
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
subheading: str
|
|
75
|
+
The subheading of the given section
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
self._body += f"<h3>{subheading}</h3>"
|
|
79
|
+
|
|
80
|
+
def add_images(self, images, image_captions, image_width=100, float="center"):
|
|
81
|
+
"""
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
images: list of str
|
|
85
|
+
The fullpaths to the image to
|
|
86
|
+
be added to this image row
|
|
87
|
+
image_captions : list of str
|
|
88
|
+
List of image captions
|
|
89
|
+
image_width: int
|
|
90
|
+
Percentage of the the row of images
|
|
91
|
+
will occupy.
|
|
92
|
+
float: str
|
|
93
|
+
Alignment of the div containing each image in the row
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
image_row = ""
|
|
97
|
+
for img, caption in zip(images, image_captions):
|
|
98
|
+
with open(img, "rb") as image_file:
|
|
99
|
+
data_uri = base64.b64encode(image_file.read()).decode("ascii")
|
|
100
|
+
image_row += f'<div class="image" style="float:{float};width:{image_width//len(images)}%">'
|
|
101
|
+
image_row += (
|
|
102
|
+
f'<img src="data:image/png;base64,{data_uri}" style="width:100%">'
|
|
103
|
+
)
|
|
104
|
+
image_row += f'<center><div style="font-weight:bold;font-size:12;padding-bottom:20px">{caption}</div></center>'
|
|
105
|
+
image_row += "</div>"
|
|
106
|
+
|
|
107
|
+
self._body += f"<center><div>{image_row}</div></center>"
|
|
108
|
+
|
|
109
|
+
def add_paragraph(self, paragraph, font_size=14):
|
|
110
|
+
"""
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
paragraph: str
|
|
114
|
+
The text to add to
|
|
115
|
+
the report as a paragraph
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
self._body += f'<div style="font-size:{font_size}"><p>{paragraph}</p></div>'
|
|
119
|
+
|
|
120
|
+
def add_line_breaks(self, num_breaks=1):
|
|
121
|
+
"""
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
num_breaks: paragraph
|
|
125
|
+
The text to add to
|
|
126
|
+
the report as a paragraph
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
for _ in range(num_breaks):
|
|
130
|
+
self._body += "<br>"
|
|
131
|
+
|
|
132
|
+
def add_table(self, table):
|
|
133
|
+
"""
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
table: ResultTable
|
|
137
|
+
The table we want to add
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def table_style(
|
|
141
|
+
border="1px solid black",
|
|
142
|
+
padding="5px 10px",
|
|
143
|
+
font_size="11pt",
|
|
144
|
+
text_align="center",
|
|
145
|
+
width="80%",
|
|
146
|
+
):
|
|
147
|
+
return (
|
|
148
|
+
f"border: {border};"
|
|
149
|
+
f"border-collapse: collapse;"
|
|
150
|
+
f"text-align: {text_align};"
|
|
151
|
+
f"width: {width};"
|
|
152
|
+
f"padding: {padding};"
|
|
153
|
+
f"font-size: {font_size}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
html_table = ""
|
|
157
|
+
# Add headers
|
|
158
|
+
headers = "".join(
|
|
159
|
+
[f'<th style="{table_style()}">{h}</th>' for h in table.headers()]
|
|
160
|
+
)
|
|
161
|
+
html_table += f"<tr>{headers}</tr>"
|
|
162
|
+
|
|
163
|
+
# Add data
|
|
164
|
+
for i in range(table.size()):
|
|
165
|
+
row_data = "".join(
|
|
166
|
+
[
|
|
167
|
+
f'<td style="{table_style()}">{d}</td>'
|
|
168
|
+
for d in table.get_row_by_index(i)
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
html_table += f"<tr>{row_data}</tr>"
|
|
172
|
+
|
|
173
|
+
# Wrap with table details
|
|
174
|
+
html_table = f'<table style="{table_style()}">{html_table}</table>'
|
|
175
|
+
self._body += f"<center>{html_table}</center>"
|
|
176
|
+
|
|
177
|
+
def write_report(self, filename):
|
|
178
|
+
"""
|
|
179
|
+
Write the report to disk with
|
|
180
|
+
filename
|
|
181
|
+
|
|
182
|
+
Parameters
|
|
183
|
+
----------
|
|
184
|
+
filename : str
|
|
185
|
+
The name of the report
|
|
186
|
+
"""
|
|
187
|
+
with open(f"{filename}", "w") as file:
|
|
188
|
+
file.write(self.document())
|
|
189
|
+
|
|
190
|
+
def get_file_extension(self):
|
|
191
|
+
"""
|
|
192
|
+
Return the file extension for
|
|
193
|
+
the type of report
|
|
194
|
+
"""
|
|
195
|
+
return "html"
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
import pdfkit
|
|
18
|
+
|
|
19
|
+
from .html_report import HTMLReport
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PDFReport(HTMLReport):
|
|
23
|
+
"""
|
|
24
|
+
A report that takes
|
|
25
|
+
an html report and converts
|
|
26
|
+
it to PDF
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, html_report=None):
|
|
30
|
+
super().__init__(html_report)
|
|
31
|
+
|
|
32
|
+
def write_report(self, filename):
|
|
33
|
+
"""
|
|
34
|
+
Write the report to disk with
|
|
35
|
+
filename
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
filename : str
|
|
40
|
+
The name of the report
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
pdfkit.from_string(self.document(), f"{filename}", options={"quiet": ""})
|
|
44
|
+
|
|
45
|
+
def get_file_extension(self):
|
|
46
|
+
"""
|
|
47
|
+
Return the file extension for
|
|
48
|
+
the type of report
|
|
49
|
+
"""
|
|
50
|
+
return "pdf"
|