truss 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truss might be problematic. Click here for more details.
- truss/__init__.py +10 -3
- truss/api/__init__.py +123 -0
- truss/api/definitions.py +51 -0
- truss/base/constants.py +116 -0
- truss/base/custom_types.py +29 -0
- truss/{errors.py → base/errors.py} +4 -0
- truss/base/trt_llm_config.py +310 -0
- truss/{truss_config.py → base/truss_config.py} +344 -31
- truss/{truss_spec.py → base/truss_spec.py} +20 -6
- truss/{validation.py → base/validation.py} +60 -11
- truss/cli/cli.py +841 -88
- truss/{remote → cli}/remote_cli.py +2 -7
- truss/contexts/docker_build_setup.py +67 -0
- truss/contexts/image_builder/cache_warmer.py +2 -8
- truss/contexts/image_builder/image_builder.py +1 -1
- truss/contexts/image_builder/serving_image_builder.py +292 -46
- truss/contexts/image_builder/util.py +1 -3
- truss/contexts/local_loader/docker_build_emulator.py +58 -0
- truss/contexts/local_loader/load_model_local.py +2 -2
- truss/contexts/local_loader/truss_module_loader.py +1 -1
- truss/contexts/local_loader/utils.py +1 -1
- truss/local/local_config.py +2 -6
- truss/local/local_config_handler.py +20 -5
- truss/patch/__init__.py +1 -0
- truss/patch/hash.py +4 -70
- truss/patch/signature.py +4 -16
- truss/patch/truss_dir_patch_applier.py +3 -78
- truss/remote/baseten/api.py +308 -23
- truss/remote/baseten/auth.py +3 -3
- truss/remote/baseten/core.py +257 -50
- truss/remote/baseten/custom_types.py +44 -0
- truss/remote/baseten/error.py +4 -0
- truss/remote/baseten/remote.py +369 -118
- truss/remote/baseten/service.py +118 -11
- truss/remote/baseten/utils/status.py +29 -0
- truss/remote/baseten/utils/tar.py +34 -22
- truss/remote/baseten/utils/transfer.py +36 -23
- truss/remote/remote_factory.py +14 -5
- truss/remote/truss_remote.py +72 -45
- truss/templates/base.Dockerfile.jinja +18 -16
- truss/templates/cache.Dockerfile.jinja +3 -3
- truss/{server → templates/control}/control/application.py +14 -35
- truss/{server → templates/control}/control/endpoints.py +39 -9
- truss/{server/control/patch/types.py → templates/control/control/helpers/custom_types.py} +13 -52
- truss/{server → templates/control}/control/helpers/inference_server_controller.py +4 -8
- truss/{server → templates/control}/control/helpers/inference_server_process_controller.py +2 -4
- truss/{server → templates/control}/control/helpers/inference_server_starter.py +5 -10
- truss/{server/control → templates/control/control/helpers}/truss_patch/model_code_patch_applier.py +8 -6
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/model_container_patch_applier.py +18 -26
- truss/templates/control/control/helpers/truss_patch/requirement_name_identifier.py +66 -0
- truss/{server → templates/control}/control/server.py +11 -6
- truss/templates/control/requirements.txt +9 -0
- truss/templates/custom_python_dx/my_model.py +28 -0
- truss/templates/docker_server/proxy.conf.jinja +42 -0
- truss/templates/docker_server/supervisord.conf.jinja +27 -0
- truss/templates/docker_server_requirements.txt +1 -0
- truss/templates/server/common/errors.py +231 -0
- truss/{server → templates/server}/common/patches/whisper/patch.py +1 -0
- truss/{server/common/patches/__init__.py → templates/server/common/patches.py} +1 -3
- truss/{server → templates/server}/common/retry.py +1 -0
- truss/{server → templates/server}/common/schema.py +11 -9
- truss/templates/server/common/tracing.py +157 -0
- truss/templates/server/main.py +9 -0
- truss/templates/server/model_wrapper.py +961 -0
- truss/templates/server/requirements.txt +21 -0
- truss/templates/server/truss_server.py +447 -0
- truss/templates/server.Dockerfile.jinja +62 -14
- truss/templates/shared/dynamic_config_resolver.py +28 -0
- truss/templates/shared/lazy_data_resolver.py +164 -0
- truss/templates/shared/log_config.py +125 -0
- truss/{server → templates}/shared/secrets_resolver.py +1 -2
- truss/{server → templates}/shared/serialization.py +31 -9
- truss/{server → templates}/shared/util.py +3 -13
- truss/templates/trtllm-audio/model/model.py +49 -0
- truss/templates/trtllm-audio/packages/sigint_patch.py +14 -0
- truss/templates/trtllm-audio/packages/whisper_trt/__init__.py +215 -0
- truss/templates/trtllm-audio/packages/whisper_trt/assets.py +25 -0
- truss/templates/trtllm-audio/packages/whisper_trt/batching.py +52 -0
- truss/templates/trtllm-audio/packages/whisper_trt/custom_types.py +26 -0
- truss/templates/trtllm-audio/packages/whisper_trt/modeling.py +184 -0
- truss/templates/trtllm-audio/packages/whisper_trt/tokenizer.py +185 -0
- truss/templates/trtllm-audio/packages/whisper_trt/utils.py +245 -0
- truss/templates/trtllm-briton/src/extension.py +64 -0
- truss/tests/conftest.py +302 -94
- truss/tests/contexts/image_builder/test_serving_image_builder.py +74 -31
- truss/tests/contexts/local_loader/test_load_local.py +2 -2
- truss/tests/contexts/local_loader/test_truss_module_finder.py +1 -1
- truss/tests/patch/test_calc_patch.py +439 -127
- truss/tests/patch/test_dir_signature.py +3 -12
- truss/tests/patch/test_hash.py +1 -1
- truss/tests/patch/test_signature.py +1 -1
- truss/tests/patch/test_truss_dir_patch_applier.py +23 -11
- truss/tests/patch/test_types.py +2 -2
- truss/tests/remote/baseten/test_api.py +153 -58
- truss/tests/remote/baseten/test_auth.py +2 -1
- truss/tests/remote/baseten/test_core.py +160 -12
- truss/tests/remote/baseten/test_remote.py +489 -77
- truss/tests/remote/baseten/test_service.py +55 -0
- truss/tests/remote/test_remote_factory.py +16 -18
- truss/tests/remote/test_truss_remote.py +26 -17
- truss/tests/templates/control/control/helpers/test_context_managers.py +11 -0
- truss/tests/templates/control/control/helpers/test_model_container_patch_applier.py +184 -0
- truss/tests/templates/control/control/helpers/test_requirement_name_identifier.py +89 -0
- truss/tests/{server → templates/control}/control/test_server.py +79 -24
- truss/tests/{server → templates/control}/control/test_server_integration.py +24 -16
- truss/tests/templates/core/server/test_dynamic_config_resolver.py +108 -0
- truss/tests/templates/core/server/test_lazy_data_resolver.py +329 -0
- truss/tests/templates/core/server/test_lazy_data_resolver_v2.py +79 -0
- truss/tests/{server → templates}/core/server/test_secrets_resolver.py +1 -1
- truss/tests/{server → templates/server}/common/test_retry.py +3 -3
- truss/tests/templates/server/test_model_wrapper.py +248 -0
- truss/tests/{server → templates/server}/test_schema.py +3 -5
- truss/tests/{server/core/server/common → templates/server}/test_truss_server.py +8 -5
- truss/tests/test_build.py +9 -52
- truss/tests/test_config.py +336 -77
- truss/tests/test_context_builder_image.py +3 -11
- truss/tests/test_control_truss_patching.py +7 -12
- truss/tests/test_custom_server.py +38 -0
- truss/tests/test_data/context_builder_image_test/test.py +3 -0
- truss/tests/test_data/happy.ipynb +56 -0
- truss/tests/test_data/model_load_failure_test/config.yaml +2 -0
- truss/tests/test_data/model_load_failure_test/model/__init__.py +0 -0
- truss/tests/test_data/patch_ping_test_server/__init__.py +0 -0
- truss/{test_data → tests/test_data}/patch_ping_test_server/app.py +3 -9
- truss/{test_data → tests/test_data}/server.Dockerfile +20 -21
- truss/tests/test_data/server_conformance_test_truss/__init__.py +0 -0
- truss/tests/test_data/server_conformance_test_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/model/model.py +1 -3
- truss/tests/test_data/test_async_truss/__init__.py +0 -0
- truss/tests/test_data/test_async_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/config.yaml +16 -0
- truss/tests/test_data/test_basic_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/config.yaml +13 -0
- truss/tests/test_data/test_build_commands/model/__init__.py +0 -0
- truss/{test_data/test_streaming_async_generator_truss → tests/test_data/test_build_commands}/model/model.py +2 -3
- truss/tests/test_data/test_build_commands_failure/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/config.yaml +14 -0
- truss/tests/test_data/test_build_commands_failure/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/model/model.py +17 -0
- truss/tests/test_data/test_concurrency_truss/__init__.py +0 -0
- truss/tests/test_data/test_concurrency_truss/config.yaml +4 -0
- truss/tests/test_data/test_concurrency_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/config.yaml +20 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/Dockerfile +17 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md +10 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/VERSION +1 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/app.py +19 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/build_upload_new_image.sh +6 -0
- truss/tests/test_data/test_openai/__init__.py +0 -0
- truss/{test_data/test_basic_truss → tests/test_data/test_openai}/config.yaml +1 -2
- truss/tests/test_data/test_openai/model/__init__.py +0 -0
- truss/tests/test_data/test_openai/model/model.py +15 -0
- truss/tests/test_data/test_pyantic_v1/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/model.py +28 -0
- truss/tests/test_data/test_pyantic_v1/requirements.txt +1 -0
- truss/tests/test_data/test_pyantic_v2/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/config.yaml +13 -0
- truss/tests/test_data/test_pyantic_v2/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/model/model.py +30 -0
- truss/tests/test_data/test_pyantic_v2/requirements.txt +1 -0
- truss/tests/test_data/test_requirements_file_truss/__init__.py +0 -0
- truss/tests/test_data/test_requirements_file_truss/config.yaml +13 -0
- truss/tests/test_data/test_requirements_file_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_requirements_file_truss/model/model.py +1 -0
- truss/tests/test_data/test_streaming_async_generator_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/model.py +7 -0
- truss/tests/test_data/test_streaming_read_timeout/__init__.py +0 -0
- truss/tests/test_data/test_streaming_read_timeout/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_streaming_truss_with_error/model/model.py +3 -11
- truss/tests/test_data/test_streaming_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/config.yaml +43 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/model.py +65 -0
- truss/tests/test_data/test_trt_llm_truss/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/config.yaml +15 -0
- truss/tests/test_data/test_trt_llm_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/model/model.py +15 -0
- truss/tests/test_data/test_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss/config.yaml +4 -0
- truss/tests/test_data/test_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss/model/dummy +0 -0
- truss/tests/test_data/test_truss/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss/packages/test_package/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/config.yaml +4 -0
- truss/tests/test_data/test_truss_with_error/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/model/model.py +8 -0
- truss/tests/test_data/test_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_docker.py +2 -1
- truss/tests/test_model_inference.py +1340 -292
- truss/tests/test_model_schema.py +33 -26
- truss/tests/test_testing_utilities_for_other_tests.py +50 -5
- truss/tests/test_truss_gatherer.py +3 -5
- truss/tests/test_truss_handle.py +62 -59
- truss/tests/test_util.py +2 -1
- truss/tests/test_validation.py +15 -13
- truss/tests/trt_llm/test_trt_llm_config.py +41 -0
- truss/tests/trt_llm/test_validation.py +91 -0
- truss/tests/util/test_config_checks.py +40 -0
- truss/tests/util/test_env_vars.py +14 -0
- truss/tests/util/test_path.py +10 -23
- truss/trt_llm/config_checks.py +43 -0
- truss/trt_llm/validation.py +42 -0
- truss/truss_handle/__init__.py +0 -0
- truss/truss_handle/build.py +122 -0
- truss/{decorators.py → truss_handle/decorators.py} +1 -1
- truss/truss_handle/patch/__init__.py +0 -0
- truss/{patch → truss_handle/patch}/calc_patch.py +146 -92
- truss/{types.py → truss_handle/patch/custom_types.py} +35 -27
- truss/{patch → truss_handle/patch}/dir_signature.py +1 -1
- truss/truss_handle/patch/hash.py +71 -0
- truss/{patch → truss_handle/patch}/local_truss_patch_applier.py +6 -4
- truss/truss_handle/patch/signature.py +22 -0
- truss/truss_handle/patch/truss_dir_patch_applier.py +87 -0
- truss/{readme_generator.py → truss_handle/readme_generator.py} +3 -2
- truss/{truss_gatherer.py → truss_handle/truss_gatherer.py} +3 -2
- truss/{truss_handle.py → truss_handle/truss_handle.py} +174 -78
- truss/util/.truss_ignore +3 -0
- truss/{docker.py → util/docker.py} +6 -2
- truss/util/download.py +6 -15
- truss/util/env_vars.py +41 -0
- truss/util/log_utils.py +52 -0
- truss/util/path.py +20 -20
- truss/util/requirements.py +11 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/METADATA +18 -16
- truss-0.60.0.dist-info/RECORD +324 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/WHEEL +1 -1
- truss-0.60.0.dist-info/entry_points.txt +4 -0
- truss_chains/__init__.py +71 -0
- truss_chains/definitions.py +756 -0
- truss_chains/deployment/__init__.py +0 -0
- truss_chains/deployment/code_gen.py +816 -0
- truss_chains/deployment/deployment_client.py +871 -0
- truss_chains/framework.py +1480 -0
- truss_chains/public_api.py +231 -0
- truss_chains/py.typed +0 -0
- truss_chains/pydantic_numpy.py +131 -0
- truss_chains/reference_code/reference_chainlet.py +34 -0
- truss_chains/reference_code/reference_model.py +10 -0
- truss_chains/remote_chainlet/__init__.py +0 -0
- truss_chains/remote_chainlet/model_skeleton.py +60 -0
- truss_chains/remote_chainlet/stub.py +380 -0
- truss_chains/remote_chainlet/utils.py +332 -0
- truss_chains/streaming.py +378 -0
- truss_chains/utils.py +178 -0
- CODE_OF_CONDUCT.md +0 -131
- CONTRIBUTING.md +0 -48
- README.md +0 -137
- context_builder.Dockerfile +0 -24
- truss/blob/blob_backend.py +0 -10
- truss/blob/blob_backend_registry.py +0 -23
- truss/blob/http_public_blob_backend.py +0 -23
- truss/build/__init__.py +0 -2
- truss/build/build.py +0 -143
- truss/build/configure.py +0 -63
- truss/cli/__init__.py +0 -2
- truss/cli/console.py +0 -5
- truss/cli/create.py +0 -5
- truss/config/trt_llm.py +0 -81
- truss/constants.py +0 -61
- truss/model_inference.py +0 -123
- truss/patch/types.py +0 -30
- truss/pytest.ini +0 -7
- truss/server/common/errors.py +0 -100
- truss/server/common/termination_handler_middleware.py +0 -64
- truss/server/common/truss_server.py +0 -389
- truss/server/control/patch/model_code_patch_applier.py +0 -46
- truss/server/control/patch/requirement_name_identifier.py +0 -17
- truss/server/inference_server.py +0 -29
- truss/server/model_wrapper.py +0 -434
- truss/server/shared/logging.py +0 -81
- truss/templates/trtllm/model/model.py +0 -97
- truss/templates/trtllm/packages/build_engine_utils.py +0 -34
- truss/templates/trtllm/packages/constants.py +0 -11
- truss/templates/trtllm/packages/schema.py +0 -216
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/ensemble/config.pbtxt +0 -246
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/1/model.py +0 -181
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/config.pbtxt +0 -64
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/1/model.py +0 -260
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/config.pbtxt +0 -99
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/tensorrt_llm/config.pbtxt +0 -208
- truss/templates/trtllm/packages/triton_client.py +0 -150
- truss/templates/trtllm/packages/utils.py +0 -43
- truss/test_data/context_builder_image_test/test.py +0 -4
- truss/test_data/happy.ipynb +0 -54
- truss/test_data/model_load_failure_test/config.yaml +0 -2
- truss/test_data/test_concurrency_truss/config.yaml +0 -2
- truss/test_data/test_streaming_async_generator_truss/config.yaml +0 -2
- truss/test_data/test_streaming_truss/config.yaml +0 -3
- truss/test_data/test_truss/config.yaml +0 -2
- truss/tests/server/common/test_termination_handler_middleware.py +0 -93
- truss/tests/server/control/test_model_container_patch_applier.py +0 -203
- truss/tests/server/core/server/common/test_util.py +0 -19
- truss/tests/server/test_model_wrapper.py +0 -87
- truss/util/data_structures.py +0 -16
- truss-0.10.0rc1.dist-info/RECORD +0 -216
- truss-0.10.0rc1.dist-info/entry_points.txt +0 -3
- truss/{server/shared → base}/__init__.py +0 -0
- truss/{server → templates/control}/control/helpers/context_managers.py +0 -0
- truss/{server/control → templates/control/control/helpers}/errors.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/__init__.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/system_packages.py +0 -0
- truss/{test_data/annotated_types_truss/model → templates/server}/__init__.py +0 -0
- truss/{server → templates/server}/common/__init__.py +0 -0
- truss/{test_data/gcs_fix/model → templates/shared}/__init__.py +0 -0
- truss/templates/{trtllm → trtllm-briton}/README.md +0 -0
- truss/{test_data/server_conformance_test_truss/model → tests/test_data}/__init__.py +0 -0
- truss/{test_data/test_basic_truss/model → tests/test_data/annotated_types_truss}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/config.yaml +0 -0
- truss/{test_data/test_requirements_file_truss → tests/test_data/annotated_types_truss}/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/model/model.py +0 -0
- truss/{test_data → tests/test_data}/auto-mpg.data +0 -0
- truss/{test_data → tests/test_data}/context_builder_image_test/Dockerfile +0 -0
- truss/{test_data/test_truss/model → tests/test_data/context_builder_image_test}/__init__.py +0 -0
- truss/{test_data/test_truss_server_caching_truss/model → tests/test_data/gcs_fix}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/config.yaml +0 -0
- truss/tests/{local → test_data/gcs_fix/model}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/model/model.py +0 -0
- truss/{test_data/test_truss/model/dummy → tests/test_data/model_load_failure_test/__init__.py} +0 -0
- truss/{test_data → tests/test_data}/model_load_failure_test/model/model.py +0 -0
- truss/{test_data → tests/test_data}/pima-indians-diabetes.csv +0 -0
- truss/{test_data → tests/test_data}/readme_int_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_no_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_str_example.md +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/model/model.py +3 -3
- /truss/{test_data → tests/test_data}/test_basic_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_concurrency_truss/model/model.py +0 -0
- /truss/{test_data/test_requirements_file_truss → tests/test_data/test_pyantic_v1}/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_requirements_file_truss/requirements.txt +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss_with_error/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/examples.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss/packages/test_package/test.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/model/model.py +0 -0
- /truss/{patch → truss_handle/patch}/constants.py +0 -0
- /truss/{notebook.py → util/notebook.py} +0 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/LICENSE +0 -0
|
@@ -1,21 +1,30 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
2
4
|
from dataclasses import _MISSING_TYPE, dataclass, field, fields
|
|
3
5
|
from enum import Enum
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Dict, List, Optional
|
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
6
8
|
|
|
7
9
|
import yaml
|
|
8
|
-
|
|
9
|
-
from truss.constants import HTTP_PUBLIC_BLOB_BACKEND
|
|
10
|
-
from truss.
|
|
11
|
-
from truss.
|
|
12
|
-
from truss.
|
|
13
|
-
|
|
10
|
+
|
|
11
|
+
from truss.base.constants import HTTP_PUBLIC_BLOB_BACKEND
|
|
12
|
+
from truss.base.custom_types import ModelFrameworkType
|
|
13
|
+
from truss.base.errors import ValidationError
|
|
14
|
+
from truss.base.trt_llm_config import (
|
|
15
|
+
TRTLLMConfiguration,
|
|
16
|
+
TrussTRTLLMBuildConfiguration,
|
|
17
|
+
TrussTRTLLMQuantizationType,
|
|
18
|
+
)
|
|
19
|
+
from truss.base.validation import (
|
|
14
20
|
validate_cpu_spec,
|
|
15
21
|
validate_memory_spec,
|
|
22
|
+
validate_node_count,
|
|
16
23
|
validate_python_executable_path,
|
|
17
24
|
validate_secret_name,
|
|
25
|
+
validate_secret_to_path_mapping,
|
|
18
26
|
)
|
|
27
|
+
from truss.util.requirements import parse_requirement_string
|
|
19
28
|
|
|
20
29
|
DEFAULT_MODEL_FRAMEWORK_TYPE = ModelFrameworkType.CUSTOM
|
|
21
30
|
DEFAULT_MODEL_TYPE = "Model"
|
|
@@ -30,17 +39,27 @@ DEFAULT_DATA_DIRECTORY = "data"
|
|
|
30
39
|
DEFAULT_EXAMPLES_FILENAME = "examples.yaml"
|
|
31
40
|
DEFAULT_SPEC_VERSION = "2.0"
|
|
32
41
|
DEFAULT_PREDICT_CONCURRENCY = 1
|
|
33
|
-
DEFAULT_NUM_WORKERS = 1
|
|
34
42
|
DEFAULT_STREAMING_RESPONSE_READ_TIMEOUT = 60
|
|
35
|
-
|
|
43
|
+
DEFAULT_ENABLE_TRACING_DATA = False # This should be in sync with tracing.py.
|
|
36
44
|
DEFAULT_CPU = "1"
|
|
37
45
|
DEFAULT_MEMORY = "2Gi"
|
|
38
46
|
DEFAULT_USE_GPU = False
|
|
39
47
|
|
|
40
48
|
DEFAULT_BLOB_BACKEND = HTTP_PUBLIC_BLOB_BACKEND
|
|
41
49
|
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
VALID_PYTHON_VERSIONS = ["py38", "py39", "py310", "py311"]
|
|
51
|
+
|
|
52
|
+
X = TypeVar("X")
|
|
53
|
+
Y = TypeVar("Y")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def transform_optional(x: Optional[X], fn: Callable[[X], Optional[Y]]) -> Optional[Y]:
|
|
57
|
+
if x is None:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
return fn(x)
|
|
61
|
+
|
|
62
|
+
|
|
44
63
|
logger = logging.getLogger(__name__)
|
|
45
64
|
|
|
46
65
|
|
|
@@ -51,6 +70,8 @@ class Accelerator(Enum):
|
|
|
51
70
|
V100 = "V100"
|
|
52
71
|
A100 = "A100"
|
|
53
72
|
H100 = "H100"
|
|
73
|
+
H200 = "H200"
|
|
74
|
+
H100_40GB = "H100_40GB"
|
|
54
75
|
|
|
55
76
|
|
|
56
77
|
@dataclass
|
|
@@ -133,31 +154,66 @@ class ModelCache:
|
|
|
133
154
|
return [model.to_dict(verbose=verbose) for model in self.models]
|
|
134
155
|
|
|
135
156
|
|
|
157
|
+
@dataclass
|
|
158
|
+
class HealthChecks:
|
|
159
|
+
restart_check_delay_seconds: Optional[int] = None
|
|
160
|
+
restart_threshold_seconds: Optional[int] = None
|
|
161
|
+
stop_traffic_threshold_seconds: Optional[int] = None
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def from_dict(d):
|
|
165
|
+
return HealthChecks(
|
|
166
|
+
restart_check_delay_seconds=d.get("restart_check_delay_seconds"),
|
|
167
|
+
restart_threshold_seconds=d.get("restart_threshold_seconds"),
|
|
168
|
+
stop_traffic_threshold_seconds=d.get("stop_traffic_threshold_seconds"),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def to_dict(self):
|
|
172
|
+
return {
|
|
173
|
+
"restart_check_delay_seconds": self.restart_check_delay_seconds,
|
|
174
|
+
"restart_threshold_seconds": self.restart_threshold_seconds,
|
|
175
|
+
"stop_traffic_threshold_seconds": self.stop_traffic_threshold_seconds,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
136
179
|
@dataclass
|
|
137
180
|
class Runtime:
|
|
138
181
|
predict_concurrency: int = DEFAULT_PREDICT_CONCURRENCY
|
|
139
|
-
num_workers: int = DEFAULT_NUM_WORKERS
|
|
140
182
|
streaming_read_timeout: int = DEFAULT_STREAMING_RESPONSE_READ_TIMEOUT
|
|
183
|
+
enable_tracing_data: bool = DEFAULT_ENABLE_TRACING_DATA
|
|
184
|
+
enable_debug_logs: bool = False
|
|
185
|
+
health_checks: HealthChecks = field(default_factory=HealthChecks)
|
|
141
186
|
|
|
142
187
|
@staticmethod
|
|
143
188
|
def from_dict(d):
|
|
144
189
|
predict_concurrency = d.get("predict_concurrency", DEFAULT_PREDICT_CONCURRENCY)
|
|
145
|
-
num_workers = d.get("num_workers",
|
|
190
|
+
num_workers = d.get("num_workers", 1)
|
|
191
|
+
if num_workers != 1:
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"After truss 0.9.49 only 1 worker per server is allowed. "
|
|
194
|
+
"For concurrency utilize asyncio, autoscaling replicas "
|
|
195
|
+
"and as a last resort thread/process pools inside the "
|
|
196
|
+
"truss model."
|
|
197
|
+
)
|
|
146
198
|
streaming_read_timeout = d.get(
|
|
147
199
|
"streaming_read_timeout", DEFAULT_STREAMING_RESPONSE_READ_TIMEOUT
|
|
148
200
|
)
|
|
201
|
+
enable_tracing_data = d.get("enable_tracing_data", DEFAULT_ENABLE_TRACING_DATA)
|
|
202
|
+
health_checks = HealthChecks.from_dict(d.get("health_checks", {}))
|
|
149
203
|
|
|
150
204
|
return Runtime(
|
|
151
205
|
predict_concurrency=predict_concurrency,
|
|
152
|
-
num_workers=num_workers,
|
|
153
206
|
streaming_read_timeout=streaming_read_timeout,
|
|
207
|
+
enable_tracing_data=enable_tracing_data,
|
|
208
|
+
health_checks=health_checks,
|
|
154
209
|
)
|
|
155
210
|
|
|
156
211
|
def to_dict(self):
|
|
157
212
|
return {
|
|
158
213
|
"predict_concurrency": self.predict_concurrency,
|
|
159
|
-
"num_workers": self.num_workers,
|
|
160
214
|
"streaming_read_timeout": self.streaming_read_timeout,
|
|
215
|
+
"enable_tracing_data": self.enable_tracing_data,
|
|
216
|
+
"health_checks": self.health_checks.to_dict(),
|
|
161
217
|
}
|
|
162
218
|
|
|
163
219
|
|
|
@@ -176,15 +232,23 @@ class ModelServer(Enum):
|
|
|
176
232
|
class Build:
|
|
177
233
|
model_server: ModelServer = ModelServer.TrussServer
|
|
178
234
|
arguments: Dict = field(default_factory=dict)
|
|
235
|
+
secret_to_path_mapping: Dict = field(default_factory=dict)
|
|
179
236
|
|
|
180
237
|
@staticmethod
|
|
181
238
|
def from_dict(d):
|
|
182
239
|
model_server = ModelServer[d.get("model_server", "TrussServer")]
|
|
183
240
|
arguments = d.get("arguments", {})
|
|
241
|
+
secret_to_path_mapping = d.get("secret_to_path_mapping", {})
|
|
242
|
+
validate_secret_to_path_mapping(secret_to_path_mapping)
|
|
243
|
+
if not isinstance(secret_to_path_mapping, dict):
|
|
244
|
+
raise ValueError(
|
|
245
|
+
"Please pass a valid mapping for `secret_to_path_mapping`."
|
|
246
|
+
)
|
|
184
247
|
|
|
185
248
|
return Build(
|
|
186
249
|
model_server=model_server,
|
|
187
250
|
arguments=arguments,
|
|
251
|
+
secret_to_path_mapping=secret_to_path_mapping,
|
|
188
252
|
)
|
|
189
253
|
|
|
190
254
|
def to_dict(self):
|
|
@@ -197,6 +261,7 @@ class Resources:
|
|
|
197
261
|
memory: str = DEFAULT_MEMORY
|
|
198
262
|
use_gpu: bool = DEFAULT_USE_GPU
|
|
199
263
|
accelerator: AcceleratorSpec = field(default_factory=AcceleratorSpec)
|
|
264
|
+
node_count: Optional[int] = None
|
|
200
265
|
|
|
201
266
|
@staticmethod
|
|
202
267
|
def from_dict(d):
|
|
@@ -209,20 +274,26 @@ class Resources:
|
|
|
209
274
|
if accelerator.accelerator is not None:
|
|
210
275
|
use_gpu = True
|
|
211
276
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
)
|
|
277
|
+
r = Resources(cpu=cpu, memory=memory, use_gpu=use_gpu, accelerator=accelerator)
|
|
278
|
+
|
|
279
|
+
# only add node_count if not None. This helps keep
|
|
280
|
+
# config generated by truss init concise.
|
|
281
|
+
node_count = d.get("node_count")
|
|
282
|
+
validate_node_count(node_count)
|
|
283
|
+
r.node_count = node_count
|
|
284
|
+
|
|
285
|
+
return r
|
|
218
286
|
|
|
219
287
|
def to_dict(self):
|
|
220
|
-
|
|
288
|
+
d = {
|
|
221
289
|
"cpu": self.cpu,
|
|
222
290
|
"memory": self.memory,
|
|
223
291
|
"use_gpu": self.use_gpu,
|
|
224
292
|
"accelerator": self.accelerator.to_str(),
|
|
225
293
|
}
|
|
294
|
+
if self.node_count is not None:
|
|
295
|
+
d["node_count"] = self.node_count
|
|
296
|
+
return d
|
|
226
297
|
|
|
227
298
|
|
|
228
299
|
@dataclass
|
|
@@ -296,25 +367,112 @@ class ExternalData:
|
|
|
296
367
|
return [item.to_dict() for item in self.items]
|
|
297
368
|
|
|
298
369
|
|
|
370
|
+
class DockerAuthType(Enum):
|
|
371
|
+
"""
|
|
372
|
+
This enum will express all of the types of registry
|
|
373
|
+
authentication we support.
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
GCP_SERVICE_ACCOUNT_JSON = "GCP_SERVICE_ACCOUNT_JSON"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
@dataclass
|
|
380
|
+
class DockerAuthSettings:
|
|
381
|
+
"""
|
|
382
|
+
Provides information about how to authenticate to the docker registry containing
|
|
383
|
+
the custom base image.
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
auth_method: DockerAuthType
|
|
387
|
+
secret_name: str
|
|
388
|
+
registry: Optional[str] = ""
|
|
389
|
+
|
|
390
|
+
@staticmethod
|
|
391
|
+
def from_dict(d: Dict[str, str]):
|
|
392
|
+
auth_method = d.get("auth_method")
|
|
393
|
+
secret_name = d.get("secret_name")
|
|
394
|
+
|
|
395
|
+
if auth_method:
|
|
396
|
+
# Capitalize the auth method so that we support this field passed
|
|
397
|
+
# as "gcs_service_account".
|
|
398
|
+
auth_method = auth_method.upper()
|
|
399
|
+
|
|
400
|
+
if (
|
|
401
|
+
not secret_name
|
|
402
|
+
or not auth_method
|
|
403
|
+
or auth_method not in [auth_type.value for auth_type in DockerAuthType]
|
|
404
|
+
):
|
|
405
|
+
raise ValueError("Please provide a `secret_name`, and valid `auth_method`")
|
|
406
|
+
|
|
407
|
+
return DockerAuthSettings(
|
|
408
|
+
auth_method=DockerAuthType[auth_method],
|
|
409
|
+
secret_name=secret_name,
|
|
410
|
+
registry=d.get("registry"),
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
def to_dict(self):
|
|
414
|
+
return {
|
|
415
|
+
"auth_method": self.auth_method.value,
|
|
416
|
+
"secret_name": self.secret_name,
|
|
417
|
+
"registry": self.registry,
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
|
|
299
421
|
@dataclass
|
|
300
422
|
class BaseImage:
|
|
301
423
|
image: str = ""
|
|
302
424
|
python_executable_path: str = ""
|
|
425
|
+
docker_auth: Optional[DockerAuthSettings] = None
|
|
303
426
|
|
|
304
427
|
@staticmethod
|
|
305
428
|
def from_dict(d):
|
|
306
429
|
image = d.get("image", "")
|
|
307
430
|
python_executable_path = d.get("python_executable_path", "")
|
|
431
|
+
docker_auth = d.get("docker_auth")
|
|
308
432
|
validate_python_executable_path(python_executable_path)
|
|
309
433
|
return BaseImage(
|
|
310
434
|
image=image,
|
|
311
435
|
python_executable_path=python_executable_path,
|
|
436
|
+
docker_auth=(
|
|
437
|
+
DockerAuthSettings.from_dict(docker_auth) if docker_auth else None
|
|
438
|
+
),
|
|
312
439
|
)
|
|
313
440
|
|
|
314
441
|
def to_dict(self):
|
|
315
442
|
return {
|
|
316
443
|
"image": self.image,
|
|
317
444
|
"python_executable_path": self.python_executable_path,
|
|
445
|
+
"docker_auth": transform_optional(
|
|
446
|
+
self.docker_auth, lambda docker_auth: docker_auth.to_dict()
|
|
447
|
+
),
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
@dataclass
|
|
452
|
+
class DockerServer:
|
|
453
|
+
start_command: str
|
|
454
|
+
server_port: int
|
|
455
|
+
predict_endpoint: str
|
|
456
|
+
readiness_endpoint: str
|
|
457
|
+
liveness_endpoint: str
|
|
458
|
+
|
|
459
|
+
@staticmethod
|
|
460
|
+
def from_dict(d) -> "DockerServer":
|
|
461
|
+
return DockerServer(
|
|
462
|
+
start_command=d.get("start_command"),
|
|
463
|
+
server_port=d.get("server_port"),
|
|
464
|
+
predict_endpoint=d.get("predict_endpoint"),
|
|
465
|
+
readiness_endpoint=d.get("readiness_endpoint"),
|
|
466
|
+
liveness_endpoint=d.get("liveness_endpoint"),
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def to_dict(self):
|
|
470
|
+
return {
|
|
471
|
+
"start_command": self.start_command,
|
|
472
|
+
"server_port": self.server_port,
|
|
473
|
+
"readiness_endpoint": self.readiness_endpoint,
|
|
474
|
+
"liveness_endpoint": self.liveness_endpoint,
|
|
475
|
+
"predict_endpoint": self.predict_endpoint,
|
|
318
476
|
}
|
|
319
477
|
|
|
320
478
|
|
|
@@ -375,6 +533,7 @@ class TrussConfig:
|
|
|
375
533
|
memory: 14Gi
|
|
376
534
|
use_gpu: true
|
|
377
535
|
accelerator: A10G
|
|
536
|
+
node_count: 2
|
|
378
537
|
```
|
|
379
538
|
secrets (Dict[str, str]):
|
|
380
539
|
<Warning>
|
|
@@ -434,16 +593,25 @@ class TrussConfig:
|
|
|
434
593
|
# spec_version is a version string
|
|
435
594
|
spec_version: str = DEFAULT_SPEC_VERSION
|
|
436
595
|
base_image: Optional[BaseImage] = None
|
|
596
|
+
docker_server: Optional[DockerServer] = None
|
|
437
597
|
model_cache: ModelCache = field(default_factory=ModelCache)
|
|
438
598
|
trt_llm: Optional[TRTLLMConfiguration] = None
|
|
599
|
+
build_commands: List[str] = field(default_factory=list)
|
|
600
|
+
use_local_chains_src: bool = False
|
|
439
601
|
|
|
440
602
|
@property
|
|
441
603
|
def canonical_python_version(self) -> str:
|
|
442
|
-
return {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
604
|
+
return {"py311": "3.11", "py310": "3.10", "py39": "3.9", "py38": "3.8"}[
|
|
605
|
+
self.python_version
|
|
606
|
+
]
|
|
607
|
+
|
|
608
|
+
@property
|
|
609
|
+
def parsed_trt_llm_build_configs(self) -> List[TrussTRTLLMBuildConfiguration]:
|
|
610
|
+
if self.trt_llm:
|
|
611
|
+
if self.trt_llm.build.speculator and self.trt_llm.build.speculator.build:
|
|
612
|
+
return [self.trt_llm.build, self.trt_llm.build.speculator.build]
|
|
613
|
+
return [self.trt_llm.build]
|
|
614
|
+
return []
|
|
447
615
|
|
|
448
616
|
@staticmethod
|
|
449
617
|
def from_dict(d):
|
|
@@ -464,7 +632,7 @@ class TrussConfig:
|
|
|
464
632
|
requirements_file=d.get("requirements_file", None),
|
|
465
633
|
requirements=d.get("requirements", []),
|
|
466
634
|
system_packages=d.get("system_packages", []),
|
|
467
|
-
environment_variables=d.get("environment_variables", {}),
|
|
635
|
+
environment_variables=_handle_env_vars(d.get("environment_variables", {})),
|
|
468
636
|
resources=Resources.from_dict(d.get("resources", {})),
|
|
469
637
|
runtime=Runtime.from_dict(d.get("runtime", {})),
|
|
470
638
|
build=Build.from_dict(d.get("build", {})),
|
|
@@ -483,19 +651,49 @@ class TrussConfig:
|
|
|
483
651
|
d.get("external_data"), ExternalData.from_list
|
|
484
652
|
),
|
|
485
653
|
base_image=transform_optional(d.get("base_image"), BaseImage.from_dict),
|
|
654
|
+
docker_server=transform_optional(
|
|
655
|
+
d.get("docker_server"), DockerServer.from_dict
|
|
656
|
+
),
|
|
486
657
|
model_cache=transform_optional(
|
|
487
|
-
d.get("model_cache") or d.get("hf_cache") or [],
|
|
658
|
+
d.get("model_cache") or d.get("hf_cache") or [], # type: ignore
|
|
488
659
|
ModelCache.from_list,
|
|
489
660
|
),
|
|
490
661
|
trt_llm=transform_optional(
|
|
491
|
-
d.get("trt_llm"), lambda x: TRTLLMConfiguration(**x)
|
|
662
|
+
d.get("trt_llm"), lambda x: (TRTLLMConfiguration(**x))
|
|
492
663
|
),
|
|
664
|
+
build_commands=d.get("build_commands", []),
|
|
665
|
+
use_local_chains_src=d.get("use_local_chains_src", False),
|
|
493
666
|
)
|
|
494
667
|
config.validate()
|
|
495
668
|
return config
|
|
496
669
|
|
|
670
|
+
def load_requirements_from_file(self, truss_dir: Path) -> List[str]:
|
|
671
|
+
if self.requirements_file:
|
|
672
|
+
requirements_path = truss_dir / self.requirements_file
|
|
673
|
+
try:
|
|
674
|
+
requirements = []
|
|
675
|
+
with open(requirements_path) as f:
|
|
676
|
+
for line in f.readlines():
|
|
677
|
+
parsed_line = parse_requirement_string(line)
|
|
678
|
+
if parsed_line:
|
|
679
|
+
requirements.append(parsed_line)
|
|
680
|
+
return requirements
|
|
681
|
+
except Exception as e:
|
|
682
|
+
logger.exception(
|
|
683
|
+
f"failed to read requirements file: {self.requirements_file}"
|
|
684
|
+
)
|
|
685
|
+
raise e
|
|
686
|
+
return []
|
|
687
|
+
|
|
688
|
+
@staticmethod
|
|
689
|
+
def load_requirements_file_from_filepath(yaml_path: Path) -> List[str]:
|
|
690
|
+
config = TrussConfig.from_yaml(yaml_path)
|
|
691
|
+
return config.load_requirements_from_file(yaml_path.parent)
|
|
692
|
+
|
|
497
693
|
@staticmethod
|
|
498
694
|
def from_yaml(yaml_path: Path):
|
|
695
|
+
if not os.path.isfile(yaml_path):
|
|
696
|
+
raise ValueError(f"Expected a truss configuration file at {yaml_path}")
|
|
499
697
|
with yaml_path.open() as yaml_file:
|
|
500
698
|
raw_data = yaml.safe_load(yaml_file) or {}
|
|
501
699
|
if "hf_cache" in raw_data:
|
|
@@ -516,10 +714,68 @@ class TrussConfig:
|
|
|
516
714
|
def clone(self):
|
|
517
715
|
return TrussConfig.from_dict(self.to_dict())
|
|
518
716
|
|
|
717
|
+
def _validate_trt_llm_config(self) -> None:
|
|
718
|
+
if self.trt_llm:
|
|
719
|
+
if (
|
|
720
|
+
self.trt_llm.build.quantization_type
|
|
721
|
+
is TrussTRTLLMQuantizationType.WEIGHTS_ONLY_INT8
|
|
722
|
+
and self.resources.accelerator.accelerator is Accelerator.A100
|
|
723
|
+
):
|
|
724
|
+
raise ValueError(
|
|
725
|
+
"Weight only int8 quantization on A100 accelerators is not currently supported"
|
|
726
|
+
)
|
|
727
|
+
elif self.trt_llm.build.quantization_type in [
|
|
728
|
+
TrussTRTLLMQuantizationType.FP8,
|
|
729
|
+
TrussTRTLLMQuantizationType.FP8_KV,
|
|
730
|
+
] and self.resources.accelerator.accelerator not in [
|
|
731
|
+
Accelerator.H100,
|
|
732
|
+
Accelerator.H100_40GB,
|
|
733
|
+
Accelerator.L4,
|
|
734
|
+
]:
|
|
735
|
+
raise ValueError(
|
|
736
|
+
"FP8 quantization is only supported on L4 and H100 accelerators"
|
|
737
|
+
)
|
|
738
|
+
tensor_parallel_count = self.trt_llm.build.tensor_parallel_count
|
|
739
|
+
|
|
740
|
+
if tensor_parallel_count != self.resources.accelerator.count:
|
|
741
|
+
raise ValueError(
|
|
742
|
+
"Tensor parallelism and GPU count must be the same for TRT-LLM"
|
|
743
|
+
)
|
|
744
|
+
|
|
519
745
|
def validate(self):
|
|
746
|
+
if self.python_version not in VALID_PYTHON_VERSIONS:
|
|
747
|
+
raise ValueError(
|
|
748
|
+
f"Please ensure that `python_version` is one of {VALID_PYTHON_VERSIONS}"
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
if not isinstance(self.secrets, dict):
|
|
752
|
+
raise ValueError(
|
|
753
|
+
"Please ensure that `secrets` is a mapping of the form:\n"
|
|
754
|
+
"```\n"
|
|
755
|
+
"secrets:\n"
|
|
756
|
+
' secret1: "some default value"\n'
|
|
757
|
+
' secret2: "some other default value"\n'
|
|
758
|
+
"```"
|
|
759
|
+
)
|
|
520
760
|
for secret_name in self.secrets:
|
|
521
761
|
validate_secret_name(secret_name)
|
|
522
762
|
|
|
763
|
+
if self.requirements and self.requirements_file:
|
|
764
|
+
raise ValueError(
|
|
765
|
+
"Please ensure that only one of `requirements` and `requirements_file` is specified"
|
|
766
|
+
)
|
|
767
|
+
self._validate_trt_llm_config()
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _handle_env_vars(env_vars: Dict[str, Any]) -> Dict[str, str]:
|
|
771
|
+
new_env_vars = {}
|
|
772
|
+
for k, v in env_vars.items():
|
|
773
|
+
if isinstance(v, bool):
|
|
774
|
+
new_env_vars[k] = str(v).lower()
|
|
775
|
+
else:
|
|
776
|
+
new_env_vars[k] = str(v)
|
|
777
|
+
return new_env_vars
|
|
778
|
+
|
|
523
779
|
|
|
524
780
|
DATACLASS_TO_REQ_KEYS_MAP = {
|
|
525
781
|
Resources: {"accelerator", "cpu", "memory", "use_gpu"},
|
|
@@ -535,6 +791,7 @@ DATACLASS_TO_REQ_KEYS_MAP = {
|
|
|
535
791
|
"resources",
|
|
536
792
|
"secrets",
|
|
537
793
|
"system_packages",
|
|
794
|
+
"build_commands",
|
|
538
795
|
},
|
|
539
796
|
BaseImage: {"image", "python_executable_path"},
|
|
540
797
|
}
|
|
@@ -581,9 +838,65 @@ def obj_to_dict(obj, verbose: bool = False):
|
|
|
581
838
|
)
|
|
582
839
|
elif isinstance(field_curr_value, TRTLLMConfiguration):
|
|
583
840
|
d["trt_llm"] = transform_optional(
|
|
584
|
-
field_curr_value, lambda data: data.
|
|
841
|
+
field_curr_value, lambda data: data.to_json_dict(verbose=verbose)
|
|
842
|
+
)
|
|
843
|
+
elif isinstance(field_curr_value, BaseImage):
|
|
844
|
+
d["base_image"] = transform_optional(
|
|
845
|
+
field_curr_value, lambda data: data.to_dict()
|
|
846
|
+
)
|
|
847
|
+
elif isinstance(field_curr_value, DockerServer):
|
|
848
|
+
d["docker_server"] = transform_optional(
|
|
849
|
+
field_curr_value, lambda data: data.to_dict()
|
|
850
|
+
)
|
|
851
|
+
elif isinstance(field_curr_value, DockerAuthSettings):
|
|
852
|
+
d["docker_auth"] = transform_optional(
|
|
853
|
+
field_curr_value, lambda data: data.to_dict()
|
|
854
|
+
)
|
|
855
|
+
elif isinstance(field_curr_value, HealthChecks):
|
|
856
|
+
d["health_checks"] = transform_optional(
|
|
857
|
+
field_curr_value, lambda data: data.to_dict()
|
|
585
858
|
)
|
|
586
859
|
else:
|
|
587
860
|
d[field_name] = field_curr_value
|
|
588
861
|
|
|
589
862
|
return d
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
# TODO(marius): consolidate this with config/validation:
|
|
866
|
+
def _infer_python_version() -> str:
|
|
867
|
+
return f"py{sys.version_info.major}{sys.version_info.minor}"
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
def map_local_to_supported_python_version() -> str:
|
|
871
|
+
return map_to_supported_python_version(_infer_python_version())
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def map_to_supported_python_version(python_version: str) -> str:
|
|
875
|
+
"""Map python version to truss supported python version.
|
|
876
|
+
|
|
877
|
+
Currently, it maps any versions greater than 3.11 to 3.11.
|
|
878
|
+
|
|
879
|
+
Args:
|
|
880
|
+
python_version: in the form py[major_version][minor_version] e.g. py39,
|
|
881
|
+
py310.
|
|
882
|
+
"""
|
|
883
|
+
python_major_version = int(python_version[2:3])
|
|
884
|
+
python_minor_version = int(python_version[3:])
|
|
885
|
+
|
|
886
|
+
if python_major_version != 3:
|
|
887
|
+
raise NotImplementedError("Only python version 3 is supported")
|
|
888
|
+
|
|
889
|
+
if python_minor_version > 11:
|
|
890
|
+
logger.info(
|
|
891
|
+
f"Mapping python version {python_major_version}.{python_minor_version}"
|
|
892
|
+
" to 3.11, the highest version that Truss currently supports."
|
|
893
|
+
)
|
|
894
|
+
return "py311"
|
|
895
|
+
|
|
896
|
+
if python_minor_version < 8:
|
|
897
|
+
raise ValueError(
|
|
898
|
+
f"Mapping python version {python_major_version}.{python_minor_version}"
|
|
899
|
+
" to 3.8, the lowest version that Truss currently supports."
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
return python_version
|
|
@@ -3,10 +3,12 @@ from pathlib import Path
|
|
|
3
3
|
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import yaml
|
|
6
|
-
|
|
7
|
-
from truss.
|
|
8
|
-
from truss.
|
|
9
|
-
from truss.
|
|
6
|
+
|
|
7
|
+
from truss.base.constants import CONFIG_FILE
|
|
8
|
+
from truss.base.custom_types import Example, ModelFrameworkType
|
|
9
|
+
from truss.base.errors import ValidationError
|
|
10
|
+
from truss.base.truss_config import ExternalData, ModelServer, TrussConfig
|
|
11
|
+
from truss.base.validation import validate_memory_spec
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class TrussSpec:
|
|
@@ -34,6 +36,10 @@ class TrussSpec:
|
|
|
34
36
|
def external_data(self) -> Optional[ExternalData]:
|
|
35
37
|
return self._config.external_data
|
|
36
38
|
|
|
39
|
+
@property
|
|
40
|
+
def build_commands(self) -> List[str]:
|
|
41
|
+
return self._config.build_commands
|
|
42
|
+
|
|
37
43
|
@property
|
|
38
44
|
def model_module_dir(self) -> Path:
|
|
39
45
|
return self._truss_dir / self._config.model_module_dir
|
|
@@ -90,6 +96,10 @@ class TrussSpec:
|
|
|
90
96
|
def memory(self) -> str:
|
|
91
97
|
return self._config.resources.memory
|
|
92
98
|
|
|
99
|
+
@property
|
|
100
|
+
def memory_in_bytes(self) -> int:
|
|
101
|
+
return validate_memory_spec(self.memory)
|
|
102
|
+
|
|
93
103
|
@property
|
|
94
104
|
def use_gpu(self) -> str:
|
|
95
105
|
return self._config.resources.use_gpu
|
|
@@ -180,8 +190,12 @@ class TrussSpec:
|
|
|
180
190
|
return self._config.base_image.image
|
|
181
191
|
|
|
182
192
|
@property
|
|
183
|
-
def python_executable_path(self) -> str:
|
|
184
|
-
return
|
|
193
|
+
def python_executable_path(self) -> Optional[str]:
|
|
194
|
+
return (
|
|
195
|
+
self._config.base_image.python_executable_path
|
|
196
|
+
if self._config.base_image
|
|
197
|
+
else None
|
|
198
|
+
)
|
|
185
199
|
|
|
186
200
|
@property
|
|
187
201
|
def apply_library_patches(self) -> bool:
|