truss 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truss might be problematic. Click here for more details.
- truss/__init__.py +10 -3
- truss/api/__init__.py +123 -0
- truss/api/definitions.py +51 -0
- truss/base/constants.py +116 -0
- truss/base/custom_types.py +29 -0
- truss/{errors.py → base/errors.py} +4 -0
- truss/base/trt_llm_config.py +310 -0
- truss/{truss_config.py → base/truss_config.py} +344 -31
- truss/{truss_spec.py → base/truss_spec.py} +20 -6
- truss/{validation.py → base/validation.py} +60 -11
- truss/cli/cli.py +841 -88
- truss/{remote → cli}/remote_cli.py +2 -7
- truss/contexts/docker_build_setup.py +67 -0
- truss/contexts/image_builder/cache_warmer.py +2 -8
- truss/contexts/image_builder/image_builder.py +1 -1
- truss/contexts/image_builder/serving_image_builder.py +292 -46
- truss/contexts/image_builder/util.py +1 -3
- truss/contexts/local_loader/docker_build_emulator.py +58 -0
- truss/contexts/local_loader/load_model_local.py +2 -2
- truss/contexts/local_loader/truss_module_loader.py +1 -1
- truss/contexts/local_loader/utils.py +1 -1
- truss/local/local_config.py +2 -6
- truss/local/local_config_handler.py +20 -5
- truss/patch/__init__.py +1 -0
- truss/patch/hash.py +4 -70
- truss/patch/signature.py +4 -16
- truss/patch/truss_dir_patch_applier.py +3 -78
- truss/remote/baseten/api.py +308 -23
- truss/remote/baseten/auth.py +3 -3
- truss/remote/baseten/core.py +257 -50
- truss/remote/baseten/custom_types.py +44 -0
- truss/remote/baseten/error.py +4 -0
- truss/remote/baseten/remote.py +369 -118
- truss/remote/baseten/service.py +118 -11
- truss/remote/baseten/utils/status.py +29 -0
- truss/remote/baseten/utils/tar.py +34 -22
- truss/remote/baseten/utils/transfer.py +36 -23
- truss/remote/remote_factory.py +14 -5
- truss/remote/truss_remote.py +72 -45
- truss/templates/base.Dockerfile.jinja +18 -16
- truss/templates/cache.Dockerfile.jinja +3 -3
- truss/{server → templates/control}/control/application.py +14 -35
- truss/{server → templates/control}/control/endpoints.py +39 -9
- truss/{server/control/patch/types.py → templates/control/control/helpers/custom_types.py} +13 -52
- truss/{server → templates/control}/control/helpers/inference_server_controller.py +4 -8
- truss/{server → templates/control}/control/helpers/inference_server_process_controller.py +2 -4
- truss/{server → templates/control}/control/helpers/inference_server_starter.py +5 -10
- truss/{server/control → templates/control/control/helpers}/truss_patch/model_code_patch_applier.py +8 -6
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/model_container_patch_applier.py +18 -26
- truss/templates/control/control/helpers/truss_patch/requirement_name_identifier.py +66 -0
- truss/{server → templates/control}/control/server.py +11 -6
- truss/templates/control/requirements.txt +9 -0
- truss/templates/custom_python_dx/my_model.py +28 -0
- truss/templates/docker_server/proxy.conf.jinja +42 -0
- truss/templates/docker_server/supervisord.conf.jinja +27 -0
- truss/templates/docker_server_requirements.txt +1 -0
- truss/templates/server/common/errors.py +231 -0
- truss/{server → templates/server}/common/patches/whisper/patch.py +1 -0
- truss/{server/common/patches/__init__.py → templates/server/common/patches.py} +1 -3
- truss/{server → templates/server}/common/retry.py +1 -0
- truss/{server → templates/server}/common/schema.py +11 -9
- truss/templates/server/common/tracing.py +157 -0
- truss/templates/server/main.py +9 -0
- truss/templates/server/model_wrapper.py +961 -0
- truss/templates/server/requirements.txt +21 -0
- truss/templates/server/truss_server.py +447 -0
- truss/templates/server.Dockerfile.jinja +62 -14
- truss/templates/shared/dynamic_config_resolver.py +28 -0
- truss/templates/shared/lazy_data_resolver.py +164 -0
- truss/templates/shared/log_config.py +125 -0
- truss/{server → templates}/shared/secrets_resolver.py +1 -2
- truss/{server → templates}/shared/serialization.py +31 -9
- truss/{server → templates}/shared/util.py +3 -13
- truss/templates/trtllm-audio/model/model.py +49 -0
- truss/templates/trtllm-audio/packages/sigint_patch.py +14 -0
- truss/templates/trtllm-audio/packages/whisper_trt/__init__.py +215 -0
- truss/templates/trtllm-audio/packages/whisper_trt/assets.py +25 -0
- truss/templates/trtllm-audio/packages/whisper_trt/batching.py +52 -0
- truss/templates/trtllm-audio/packages/whisper_trt/custom_types.py +26 -0
- truss/templates/trtllm-audio/packages/whisper_trt/modeling.py +184 -0
- truss/templates/trtllm-audio/packages/whisper_trt/tokenizer.py +185 -0
- truss/templates/trtllm-audio/packages/whisper_trt/utils.py +245 -0
- truss/templates/trtllm-briton/src/extension.py +64 -0
- truss/tests/conftest.py +302 -94
- truss/tests/contexts/image_builder/test_serving_image_builder.py +74 -31
- truss/tests/contexts/local_loader/test_load_local.py +2 -2
- truss/tests/contexts/local_loader/test_truss_module_finder.py +1 -1
- truss/tests/patch/test_calc_patch.py +439 -127
- truss/tests/patch/test_dir_signature.py +3 -12
- truss/tests/patch/test_hash.py +1 -1
- truss/tests/patch/test_signature.py +1 -1
- truss/tests/patch/test_truss_dir_patch_applier.py +23 -11
- truss/tests/patch/test_types.py +2 -2
- truss/tests/remote/baseten/test_api.py +153 -58
- truss/tests/remote/baseten/test_auth.py +2 -1
- truss/tests/remote/baseten/test_core.py +160 -12
- truss/tests/remote/baseten/test_remote.py +489 -77
- truss/tests/remote/baseten/test_service.py +55 -0
- truss/tests/remote/test_remote_factory.py +16 -18
- truss/tests/remote/test_truss_remote.py +26 -17
- truss/tests/templates/control/control/helpers/test_context_managers.py +11 -0
- truss/tests/templates/control/control/helpers/test_model_container_patch_applier.py +184 -0
- truss/tests/templates/control/control/helpers/test_requirement_name_identifier.py +89 -0
- truss/tests/{server → templates/control}/control/test_server.py +79 -24
- truss/tests/{server → templates/control}/control/test_server_integration.py +24 -16
- truss/tests/templates/core/server/test_dynamic_config_resolver.py +108 -0
- truss/tests/templates/core/server/test_lazy_data_resolver.py +329 -0
- truss/tests/templates/core/server/test_lazy_data_resolver_v2.py +79 -0
- truss/tests/{server → templates}/core/server/test_secrets_resolver.py +1 -1
- truss/tests/{server → templates/server}/common/test_retry.py +3 -3
- truss/tests/templates/server/test_model_wrapper.py +248 -0
- truss/tests/{server → templates/server}/test_schema.py +3 -5
- truss/tests/{server/core/server/common → templates/server}/test_truss_server.py +8 -5
- truss/tests/test_build.py +9 -52
- truss/tests/test_config.py +336 -77
- truss/tests/test_context_builder_image.py +3 -11
- truss/tests/test_control_truss_patching.py +7 -12
- truss/tests/test_custom_server.py +38 -0
- truss/tests/test_data/context_builder_image_test/test.py +3 -0
- truss/tests/test_data/happy.ipynb +56 -0
- truss/tests/test_data/model_load_failure_test/config.yaml +2 -0
- truss/tests/test_data/model_load_failure_test/model/__init__.py +0 -0
- truss/tests/test_data/patch_ping_test_server/__init__.py +0 -0
- truss/{test_data → tests/test_data}/patch_ping_test_server/app.py +3 -9
- truss/{test_data → tests/test_data}/server.Dockerfile +20 -21
- truss/tests/test_data/server_conformance_test_truss/__init__.py +0 -0
- truss/tests/test_data/server_conformance_test_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/model/model.py +1 -3
- truss/tests/test_data/test_async_truss/__init__.py +0 -0
- truss/tests/test_data/test_async_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/config.yaml +16 -0
- truss/tests/test_data/test_basic_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/config.yaml +13 -0
- truss/tests/test_data/test_build_commands/model/__init__.py +0 -0
- truss/{test_data/test_streaming_async_generator_truss → tests/test_data/test_build_commands}/model/model.py +2 -3
- truss/tests/test_data/test_build_commands_failure/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/config.yaml +14 -0
- truss/tests/test_data/test_build_commands_failure/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/model/model.py +17 -0
- truss/tests/test_data/test_concurrency_truss/__init__.py +0 -0
- truss/tests/test_data/test_concurrency_truss/config.yaml +4 -0
- truss/tests/test_data/test_concurrency_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/config.yaml +20 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/Dockerfile +17 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md +10 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/VERSION +1 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/app.py +19 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/build_upload_new_image.sh +6 -0
- truss/tests/test_data/test_openai/__init__.py +0 -0
- truss/{test_data/test_basic_truss → tests/test_data/test_openai}/config.yaml +1 -2
- truss/tests/test_data/test_openai/model/__init__.py +0 -0
- truss/tests/test_data/test_openai/model/model.py +15 -0
- truss/tests/test_data/test_pyantic_v1/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/model.py +28 -0
- truss/tests/test_data/test_pyantic_v1/requirements.txt +1 -0
- truss/tests/test_data/test_pyantic_v2/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/config.yaml +13 -0
- truss/tests/test_data/test_pyantic_v2/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/model/model.py +30 -0
- truss/tests/test_data/test_pyantic_v2/requirements.txt +1 -0
- truss/tests/test_data/test_requirements_file_truss/__init__.py +0 -0
- truss/tests/test_data/test_requirements_file_truss/config.yaml +13 -0
- truss/tests/test_data/test_requirements_file_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_requirements_file_truss/model/model.py +1 -0
- truss/tests/test_data/test_streaming_async_generator_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/model.py +7 -0
- truss/tests/test_data/test_streaming_read_timeout/__init__.py +0 -0
- truss/tests/test_data/test_streaming_read_timeout/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_streaming_truss_with_error/model/model.py +3 -11
- truss/tests/test_data/test_streaming_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/config.yaml +43 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/model.py +65 -0
- truss/tests/test_data/test_trt_llm_truss/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/config.yaml +15 -0
- truss/tests/test_data/test_trt_llm_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/model/model.py +15 -0
- truss/tests/test_data/test_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss/config.yaml +4 -0
- truss/tests/test_data/test_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss/model/dummy +0 -0
- truss/tests/test_data/test_truss/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss/packages/test_package/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/config.yaml +4 -0
- truss/tests/test_data/test_truss_with_error/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/model/model.py +8 -0
- truss/tests/test_data/test_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_docker.py +2 -1
- truss/tests/test_model_inference.py +1340 -292
- truss/tests/test_model_schema.py +33 -26
- truss/tests/test_testing_utilities_for_other_tests.py +50 -5
- truss/tests/test_truss_gatherer.py +3 -5
- truss/tests/test_truss_handle.py +62 -59
- truss/tests/test_util.py +2 -1
- truss/tests/test_validation.py +15 -13
- truss/tests/trt_llm/test_trt_llm_config.py +41 -0
- truss/tests/trt_llm/test_validation.py +91 -0
- truss/tests/util/test_config_checks.py +40 -0
- truss/tests/util/test_env_vars.py +14 -0
- truss/tests/util/test_path.py +10 -23
- truss/trt_llm/config_checks.py +43 -0
- truss/trt_llm/validation.py +42 -0
- truss/truss_handle/__init__.py +0 -0
- truss/truss_handle/build.py +122 -0
- truss/{decorators.py → truss_handle/decorators.py} +1 -1
- truss/truss_handle/patch/__init__.py +0 -0
- truss/{patch → truss_handle/patch}/calc_patch.py +146 -92
- truss/{types.py → truss_handle/patch/custom_types.py} +35 -27
- truss/{patch → truss_handle/patch}/dir_signature.py +1 -1
- truss/truss_handle/patch/hash.py +71 -0
- truss/{patch → truss_handle/patch}/local_truss_patch_applier.py +6 -4
- truss/truss_handle/patch/signature.py +22 -0
- truss/truss_handle/patch/truss_dir_patch_applier.py +87 -0
- truss/{readme_generator.py → truss_handle/readme_generator.py} +3 -2
- truss/{truss_gatherer.py → truss_handle/truss_gatherer.py} +3 -2
- truss/{truss_handle.py → truss_handle/truss_handle.py} +174 -78
- truss/util/.truss_ignore +3 -0
- truss/{docker.py → util/docker.py} +6 -2
- truss/util/download.py +6 -15
- truss/util/env_vars.py +41 -0
- truss/util/log_utils.py +52 -0
- truss/util/path.py +20 -20
- truss/util/requirements.py +11 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/METADATA +18 -16
- truss-0.60.0.dist-info/RECORD +324 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/WHEEL +1 -1
- truss-0.60.0.dist-info/entry_points.txt +4 -0
- truss_chains/__init__.py +71 -0
- truss_chains/definitions.py +756 -0
- truss_chains/deployment/__init__.py +0 -0
- truss_chains/deployment/code_gen.py +816 -0
- truss_chains/deployment/deployment_client.py +871 -0
- truss_chains/framework.py +1480 -0
- truss_chains/public_api.py +231 -0
- truss_chains/py.typed +0 -0
- truss_chains/pydantic_numpy.py +131 -0
- truss_chains/reference_code/reference_chainlet.py +34 -0
- truss_chains/reference_code/reference_model.py +10 -0
- truss_chains/remote_chainlet/__init__.py +0 -0
- truss_chains/remote_chainlet/model_skeleton.py +60 -0
- truss_chains/remote_chainlet/stub.py +380 -0
- truss_chains/remote_chainlet/utils.py +332 -0
- truss_chains/streaming.py +378 -0
- truss_chains/utils.py +178 -0
- CODE_OF_CONDUCT.md +0 -131
- CONTRIBUTING.md +0 -48
- README.md +0 -137
- context_builder.Dockerfile +0 -24
- truss/blob/blob_backend.py +0 -10
- truss/blob/blob_backend_registry.py +0 -23
- truss/blob/http_public_blob_backend.py +0 -23
- truss/build/__init__.py +0 -2
- truss/build/build.py +0 -143
- truss/build/configure.py +0 -63
- truss/cli/__init__.py +0 -2
- truss/cli/console.py +0 -5
- truss/cli/create.py +0 -5
- truss/config/trt_llm.py +0 -81
- truss/constants.py +0 -61
- truss/model_inference.py +0 -123
- truss/patch/types.py +0 -30
- truss/pytest.ini +0 -7
- truss/server/common/errors.py +0 -100
- truss/server/common/termination_handler_middleware.py +0 -64
- truss/server/common/truss_server.py +0 -389
- truss/server/control/patch/model_code_patch_applier.py +0 -46
- truss/server/control/patch/requirement_name_identifier.py +0 -17
- truss/server/inference_server.py +0 -29
- truss/server/model_wrapper.py +0 -434
- truss/server/shared/logging.py +0 -81
- truss/templates/trtllm/model/model.py +0 -97
- truss/templates/trtllm/packages/build_engine_utils.py +0 -34
- truss/templates/trtllm/packages/constants.py +0 -11
- truss/templates/trtllm/packages/schema.py +0 -216
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/ensemble/config.pbtxt +0 -246
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/1/model.py +0 -181
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/config.pbtxt +0 -64
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/1/model.py +0 -260
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/config.pbtxt +0 -99
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/tensorrt_llm/config.pbtxt +0 -208
- truss/templates/trtllm/packages/triton_client.py +0 -150
- truss/templates/trtllm/packages/utils.py +0 -43
- truss/test_data/context_builder_image_test/test.py +0 -4
- truss/test_data/happy.ipynb +0 -54
- truss/test_data/model_load_failure_test/config.yaml +0 -2
- truss/test_data/test_concurrency_truss/config.yaml +0 -2
- truss/test_data/test_streaming_async_generator_truss/config.yaml +0 -2
- truss/test_data/test_streaming_truss/config.yaml +0 -3
- truss/test_data/test_truss/config.yaml +0 -2
- truss/tests/server/common/test_termination_handler_middleware.py +0 -93
- truss/tests/server/control/test_model_container_patch_applier.py +0 -203
- truss/tests/server/core/server/common/test_util.py +0 -19
- truss/tests/server/test_model_wrapper.py +0 -87
- truss/util/data_structures.py +0 -16
- truss-0.10.0rc1.dist-info/RECORD +0 -216
- truss-0.10.0rc1.dist-info/entry_points.txt +0 -3
- truss/{server/shared → base}/__init__.py +0 -0
- truss/{server → templates/control}/control/helpers/context_managers.py +0 -0
- truss/{server/control → templates/control/control/helpers}/errors.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/__init__.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/system_packages.py +0 -0
- truss/{test_data/annotated_types_truss/model → templates/server}/__init__.py +0 -0
- truss/{server → templates/server}/common/__init__.py +0 -0
- truss/{test_data/gcs_fix/model → templates/shared}/__init__.py +0 -0
- truss/templates/{trtllm → trtllm-briton}/README.md +0 -0
- truss/{test_data/server_conformance_test_truss/model → tests/test_data}/__init__.py +0 -0
- truss/{test_data/test_basic_truss/model → tests/test_data/annotated_types_truss}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/config.yaml +0 -0
- truss/{test_data/test_requirements_file_truss → tests/test_data/annotated_types_truss}/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/model/model.py +0 -0
- truss/{test_data → tests/test_data}/auto-mpg.data +0 -0
- truss/{test_data → tests/test_data}/context_builder_image_test/Dockerfile +0 -0
- truss/{test_data/test_truss/model → tests/test_data/context_builder_image_test}/__init__.py +0 -0
- truss/{test_data/test_truss_server_caching_truss/model → tests/test_data/gcs_fix}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/config.yaml +0 -0
- truss/tests/{local → test_data/gcs_fix/model}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/model/model.py +0 -0
- truss/{test_data/test_truss/model/dummy → tests/test_data/model_load_failure_test/__init__.py} +0 -0
- truss/{test_data → tests/test_data}/model_load_failure_test/model/model.py +0 -0
- truss/{test_data → tests/test_data}/pima-indians-diabetes.csv +0 -0
- truss/{test_data → tests/test_data}/readme_int_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_no_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_str_example.md +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/model/model.py +3 -3
- /truss/{test_data → tests/test_data}/test_basic_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_concurrency_truss/model/model.py +0 -0
- /truss/{test_data/test_requirements_file_truss → tests/test_data/test_pyantic_v1}/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_requirements_file_truss/requirements.txt +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss_with_error/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/examples.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss/packages/test_package/test.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/model/model.py +0 -0
- /truss/{patch → truss_handle/patch}/constants.py +0 -0
- /truss/{notebook.py → util/notebook.py} +0 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
-i https://pypi.org/simple
|
|
2
|
+
|
|
3
|
+
aiocontextvars==0.2.2
|
|
4
|
+
argparse==1.4.0
|
|
5
|
+
cython==3.0.5
|
|
6
|
+
fastapi==0.114.1
|
|
7
|
+
joblib==1.2.0
|
|
8
|
+
loguru==0.7.2
|
|
9
|
+
msgpack-numpy==0.4.8
|
|
10
|
+
msgpack==1.1.0 # Numpy/msgpack versions are finniky (1.0.2 breaks), double check when changing.
|
|
11
|
+
numpy>=1.23.5
|
|
12
|
+
opentelemetry-api>=1.25.0
|
|
13
|
+
opentelemetry-sdk>=1.25.0
|
|
14
|
+
opentelemetry-exporter-otlp>=1.25.0
|
|
15
|
+
psutil==5.9.4
|
|
16
|
+
python-json-logger==2.0.2
|
|
17
|
+
pyyaml==6.0.0
|
|
18
|
+
requests==2.31.0
|
|
19
|
+
uvicorn==0.24.0
|
|
20
|
+
uvloop==0.19.0
|
|
21
|
+
aiofiles==24.1.0
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import logging.config
|
|
5
|
+
import os
|
|
6
|
+
import signal
|
|
7
|
+
import sys
|
|
8
|
+
from http import HTTPStatus
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Optional, Union
|
|
11
|
+
|
|
12
|
+
import pydantic
|
|
13
|
+
import uvicorn
|
|
14
|
+
import yaml
|
|
15
|
+
from common import errors, tracing
|
|
16
|
+
from common.schema import TrussSchema
|
|
17
|
+
from fastapi import Depends, FastAPI, HTTPException, Request
|
|
18
|
+
from fastapi.responses import ORJSONResponse, StreamingResponse
|
|
19
|
+
from fastapi.routing import APIRoute as FastAPIRoute
|
|
20
|
+
from model_wrapper import MODEL_BASENAME, MethodName, ModelWrapper
|
|
21
|
+
from opentelemetry import propagate as otel_propagate
|
|
22
|
+
from opentelemetry import trace
|
|
23
|
+
from opentelemetry.sdk import trace as sdk_trace
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
from shared import log_config, serialization
|
|
26
|
+
from shared.secrets_resolver import SecretsResolver
|
|
27
|
+
from starlette.requests import ClientDisconnect
|
|
28
|
+
from starlette.responses import Response
|
|
29
|
+
|
|
30
|
+
if sys.version_info >= (3, 9):
|
|
31
|
+
from typing import AsyncGenerator, Generator
|
|
32
|
+
else:
|
|
33
|
+
from typing_extensions import AsyncGenerator, Generator
|
|
34
|
+
|
|
35
|
+
PYDANTIC_MAJOR_VERSION = int(pydantic.VERSION.split(".")[0])
|
|
36
|
+
|
|
37
|
+
# [IMPORTANT] A lot of things depend on this currently, change with extreme care.
|
|
38
|
+
TIMEOUT_GRACEFUL_SHUTDOWN = 120
|
|
39
|
+
INFERENCE_SERVER_FAILED_FILE = Path("~/inference_server_crashed.txt").expanduser()
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
from model_wrapper import InputType, MethodDescriptor, OutputType
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def parse_body(request: Request) -> bytes:
|
|
46
|
+
"""
|
|
47
|
+
Used by FastAPI to read body in an asynchronous manner
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
return await request.body()
|
|
51
|
+
except ClientDisconnect as exc:
|
|
52
|
+
error_message = "Client disconnected"
|
|
53
|
+
logging.error(error_message)
|
|
54
|
+
raise HTTPException(status_code=499, detail=error_message) from exc
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class BasetenEndpoints:
|
|
58
|
+
"""The implementation of the model server endpoints.
|
|
59
|
+
|
|
60
|
+
Historically, we relied on the kserve server interface, which assumes that
|
|
61
|
+
multiple models are running behind a registry. As a result, some arguments to
|
|
62
|
+
to functions will rename unused except for backwards compatibility checks.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, model: ModelWrapper, tracer: sdk_trace.Tracer) -> None:
|
|
66
|
+
self._model = model
|
|
67
|
+
self._tracer = tracer
|
|
68
|
+
|
|
69
|
+
def _safe_lookup_model(self, model_name: str = MODEL_BASENAME) -> ModelWrapper:
|
|
70
|
+
if model_name != self._model.name:
|
|
71
|
+
raise errors.ModelMissingError(model_name)
|
|
72
|
+
return self._model
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def check_healthy(model: ModelWrapper):
|
|
76
|
+
if model.load_failed:
|
|
77
|
+
INFERENCE_SERVER_FAILED_FILE.touch()
|
|
78
|
+
os.kill(os.getpid(), signal.SIGKILL)
|
|
79
|
+
|
|
80
|
+
if not model.ready:
|
|
81
|
+
raise errors.ModelNotReady(model.name)
|
|
82
|
+
|
|
83
|
+
async def model_ready(self, model_name: str) -> dict:
|
|
84
|
+
model: ModelWrapper = self._safe_lookup_model(model_name)
|
|
85
|
+
is_healthy = await model.is_healthy()
|
|
86
|
+
if is_healthy is None:
|
|
87
|
+
self.check_healthy(model)
|
|
88
|
+
elif not is_healthy:
|
|
89
|
+
raise errors.ModelNotReady(model.name)
|
|
90
|
+
|
|
91
|
+
return {}
|
|
92
|
+
|
|
93
|
+
async def model_loaded(self, model_name: str) -> dict:
|
|
94
|
+
self.check_healthy(self._safe_lookup_model(model_name))
|
|
95
|
+
|
|
96
|
+
return {}
|
|
97
|
+
|
|
98
|
+
async def invocations_ready(self) -> Dict[str, Union[str, bool]]:
|
|
99
|
+
"""
|
|
100
|
+
This method provides compatibility with Sagemaker hosting for the 'ping' endpoint.
|
|
101
|
+
"""
|
|
102
|
+
if self._model is None:
|
|
103
|
+
raise errors.ModelMissingError("model")
|
|
104
|
+
self.check_healthy(self._model)
|
|
105
|
+
|
|
106
|
+
return {}
|
|
107
|
+
|
|
108
|
+
async def invocations(
|
|
109
|
+
self, request: Request, body_raw: bytes = Depends(parse_body)
|
|
110
|
+
) -> Response:
|
|
111
|
+
"""
|
|
112
|
+
This method provides compatibility with Sagemaker hosting for the 'invocations' endpoint.
|
|
113
|
+
"""
|
|
114
|
+
return await self.predict(self._model.name, request, body_raw)
|
|
115
|
+
|
|
116
|
+
async def _parse_body(
|
|
117
|
+
self,
|
|
118
|
+
request: Request,
|
|
119
|
+
body_raw: bytes,
|
|
120
|
+
truss_schema: Optional[TrussSchema],
|
|
121
|
+
span: trace.Span,
|
|
122
|
+
) -> "InputType":
|
|
123
|
+
if self.is_binary(request):
|
|
124
|
+
with tracing.section_as_event(span, "binary-deserialize"):
|
|
125
|
+
inputs = serialization.truss_msgpack_deserialize(body_raw)
|
|
126
|
+
if truss_schema:
|
|
127
|
+
try:
|
|
128
|
+
with tracing.section_as_event(span, "parse-pydantic"):
|
|
129
|
+
inputs = truss_schema.input_type.parse_obj(inputs)
|
|
130
|
+
except pydantic.ValidationError as e:
|
|
131
|
+
raise errors.InputParsingError(
|
|
132
|
+
errors.format_pydantic_validation_error(e)
|
|
133
|
+
) from e
|
|
134
|
+
else:
|
|
135
|
+
if truss_schema:
|
|
136
|
+
try:
|
|
137
|
+
with tracing.section_as_event(span, "parse-pydantic"):
|
|
138
|
+
inputs = truss_schema.input_type.parse_raw(body_raw)
|
|
139
|
+
except pydantic.ValidationError as e:
|
|
140
|
+
raise errors.InputParsingError(
|
|
141
|
+
errors.format_pydantic_validation_error(e)
|
|
142
|
+
) from e
|
|
143
|
+
else:
|
|
144
|
+
try:
|
|
145
|
+
with tracing.section_as_event(span, "json-deserialize"):
|
|
146
|
+
inputs = json.loads(body_raw)
|
|
147
|
+
except json.JSONDecodeError as e:
|
|
148
|
+
raise errors.InputParsingError(
|
|
149
|
+
f"Invalid JSON payload: {str(e)}"
|
|
150
|
+
) from e
|
|
151
|
+
|
|
152
|
+
return inputs
|
|
153
|
+
|
|
154
|
+
async def _execute_request(
|
|
155
|
+
self,
|
|
156
|
+
model: ModelWrapper,
|
|
157
|
+
method: Callable[["InputType", Request], Awaitable["OutputType"]],
|
|
158
|
+
method_name: MethodName,
|
|
159
|
+
request: Request,
|
|
160
|
+
body_raw: bytes,
|
|
161
|
+
) -> Response:
|
|
162
|
+
"""
|
|
163
|
+
Executes a predictive endpoint
|
|
164
|
+
"""
|
|
165
|
+
if await request.is_disconnected():
|
|
166
|
+
msg = f"Client disconnected. Skipping `{method_name}`."
|
|
167
|
+
logging.info(msg)
|
|
168
|
+
raise ClientDisconnect(msg)
|
|
169
|
+
|
|
170
|
+
self.check_healthy(model)
|
|
171
|
+
trace_ctx = otel_propagate.extract(request.headers) or None
|
|
172
|
+
# This is the top-level span in the truss-server, so we set the context here.
|
|
173
|
+
# Nested spans "inherit" context automatically.
|
|
174
|
+
with self._tracer.start_as_current_span(
|
|
175
|
+
f"{method_name}-endpoint", context=trace_ctx
|
|
176
|
+
) as span:
|
|
177
|
+
inputs: Optional["InputType"]
|
|
178
|
+
if model.model_descriptor.skip_input_parsing:
|
|
179
|
+
inputs = None
|
|
180
|
+
else:
|
|
181
|
+
inputs = await self._parse_body(
|
|
182
|
+
request, body_raw, model.model_descriptor.truss_schema, span
|
|
183
|
+
)
|
|
184
|
+
with tracing.section_as_event(span, "model-call"):
|
|
185
|
+
result: "OutputType" = await method(inputs, request)
|
|
186
|
+
|
|
187
|
+
# In the case that the model returns a Generator object, return a
|
|
188
|
+
# StreamingResponse instead.
|
|
189
|
+
if isinstance(result, (AsyncGenerator, Generator)):
|
|
190
|
+
# media_type in StreamingResponse sets the Content-Type header
|
|
191
|
+
return StreamingResponse(result, media_type="application/octet-stream")
|
|
192
|
+
elif isinstance(result, Response):
|
|
193
|
+
if result.status_code >= HTTPStatus.MULTIPLE_CHOICES.value:
|
|
194
|
+
errors.add_error_headers_to_user_response(result)
|
|
195
|
+
return result
|
|
196
|
+
return self._serialize_result(result, self.is_binary(request), span)
|
|
197
|
+
|
|
198
|
+
async def chat_completions(
|
|
199
|
+
self, request: Request, body_raw: bytes = Depends(parse_body)
|
|
200
|
+
) -> Response:
|
|
201
|
+
model = self._safe_lookup_model()
|
|
202
|
+
self._raise_if_not_supported(
|
|
203
|
+
MethodName.CHAT_COMPLETIONS, model.model_descriptor.chat_completions
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
return await self._execute_request(
|
|
207
|
+
model=model,
|
|
208
|
+
method=model.chat_completions,
|
|
209
|
+
method_name=MethodName.CHAT_COMPLETIONS,
|
|
210
|
+
request=request,
|
|
211
|
+
body_raw=body_raw,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def _raise_if_not_supported(
|
|
215
|
+
self, method_name: MethodName, descriptor: Optional["MethodDescriptor"]
|
|
216
|
+
):
|
|
217
|
+
if not descriptor:
|
|
218
|
+
raise HTTPException(status_code=404, detail=f"{method_name} not supported.")
|
|
219
|
+
|
|
220
|
+
async def completions(
|
|
221
|
+
self, request: Request, body_raw: bytes = Depends(parse_body)
|
|
222
|
+
) -> Response:
|
|
223
|
+
model = self._safe_lookup_model()
|
|
224
|
+
self._raise_if_not_supported(
|
|
225
|
+
MethodName.COMPLETIONS, model.model_descriptor.completions
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return await self._execute_request(
|
|
229
|
+
model=model,
|
|
230
|
+
method=model.completions,
|
|
231
|
+
method_name=MethodName.COMPLETIONS,
|
|
232
|
+
request=request,
|
|
233
|
+
body_raw=body_raw,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
async def predict(
|
|
237
|
+
self, model_name: str, request: Request, body_raw: bytes = Depends(parse_body)
|
|
238
|
+
) -> Response:
|
|
239
|
+
model = self._safe_lookup_model(model_name)
|
|
240
|
+
|
|
241
|
+
return await self._execute_request(
|
|
242
|
+
model=model,
|
|
243
|
+
method=model, # We overwrote __call__ on ModelWrapper
|
|
244
|
+
method_name=MethodName.PREDICT,
|
|
245
|
+
request=request,
|
|
246
|
+
body_raw=body_raw,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def _serialize_result(
|
|
250
|
+
self, result: "OutputType", is_binary: bool, span: trace.Span
|
|
251
|
+
) -> Response:
|
|
252
|
+
response_headers = {}
|
|
253
|
+
if is_binary:
|
|
254
|
+
if isinstance(result, BaseModel):
|
|
255
|
+
with tracing.section_as_event(span, "binary-dump"):
|
|
256
|
+
if PYDANTIC_MAJOR_VERSION > 1:
|
|
257
|
+
result = result.model_dump(mode="python")
|
|
258
|
+
else:
|
|
259
|
+
result = result.dict()
|
|
260
|
+
# If the result is not already serialize and not a pydantic model, it must
|
|
261
|
+
# be something that can be serialized with `truss_msgpack_serialize` (some
|
|
262
|
+
# dict / nested structure).
|
|
263
|
+
if not isinstance(result, bytes):
|
|
264
|
+
with tracing.section_as_event(span, "binary-serialize"):
|
|
265
|
+
result = serialization.truss_msgpack_serialize(result)
|
|
266
|
+
|
|
267
|
+
response_headers["Content-Type"] = "application/octet-stream"
|
|
268
|
+
return Response(content=result, headers=response_headers)
|
|
269
|
+
else:
|
|
270
|
+
with tracing.section_as_event(span, "json-serialize"):
|
|
271
|
+
if isinstance(result, BaseModel):
|
|
272
|
+
# Note: chains has a pydantic integration for numpy arrays
|
|
273
|
+
# `NumpyArrayField`. `result.dict()`, passes through the array
|
|
274
|
+
# object which cannot be JSON serialized.
|
|
275
|
+
# In pydantic v2 `result.model_dump(mode="json")` could be used.
|
|
276
|
+
# For backwards compatibility we dump directly the JSON string.
|
|
277
|
+
content = result.json()
|
|
278
|
+
else:
|
|
279
|
+
content = json.dumps(result, cls=serialization.DeepNumpyEncoder)
|
|
280
|
+
|
|
281
|
+
response_headers["Content-Type"] = "application/json"
|
|
282
|
+
return Response(content=content, headers=response_headers)
|
|
283
|
+
|
|
284
|
+
async def schema(self, model_name: str) -> Dict:
|
|
285
|
+
model: ModelWrapper = self._safe_lookup_model(model_name)
|
|
286
|
+
if model.model_descriptor.truss_schema is None:
|
|
287
|
+
# If there is not a TrussSchema, we return a 404.
|
|
288
|
+
if model.ready:
|
|
289
|
+
raise HTTPException(status_code=404, detail="No schema found")
|
|
290
|
+
else:
|
|
291
|
+
raise HTTPException(
|
|
292
|
+
status_code=503,
|
|
293
|
+
detail="Schema not available, please try again later.",
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
return model.model_descriptor.truss_schema.serialize()
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def is_binary(request: Request):
|
|
300
|
+
return (
|
|
301
|
+
"Content-Type" in request.headers
|
|
302
|
+
and request.headers["Content-Type"] == "application/octet-stream"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class TrussServer:
|
|
307
|
+
"""This wrapper class manages creation and cleanup of uvicorn server processes
|
|
308
|
+
running the FastAPI inference server app.
|
|
309
|
+
|
|
310
|
+
TrussServer runs as a main process managing UvicornCustomServer subprocesses that
|
|
311
|
+
in turn may manage their own worker processes. Notably, this main process is kept
|
|
312
|
+
alive when running `servers_task()` because of the child uvicorn server processes'
|
|
313
|
+
main loop.
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
_server: Optional[uvicorn.Server]
|
|
317
|
+
|
|
318
|
+
def __init__(self, http_port: int, config_or_path: Union[str, Path, Dict]):
|
|
319
|
+
# This is run before uvicorn is up. Need explicit logging config here.
|
|
320
|
+
logging.config.dictConfig(log_config.make_log_config("INFO"))
|
|
321
|
+
|
|
322
|
+
if isinstance(config_or_path, (str, Path)):
|
|
323
|
+
with open(config_or_path, encoding="utf-8") as config_file:
|
|
324
|
+
config = yaml.safe_load(config_file)
|
|
325
|
+
else:
|
|
326
|
+
config = config_or_path
|
|
327
|
+
|
|
328
|
+
secrets = SecretsResolver.get_secrets(config)
|
|
329
|
+
tracer = tracing.get_truss_tracer(secrets, config)
|
|
330
|
+
self._http_port = http_port
|
|
331
|
+
self._config = config
|
|
332
|
+
self._model = ModelWrapper(self._config, tracer)
|
|
333
|
+
self._endpoints = BasetenEndpoints(self._model, tracer)
|
|
334
|
+
self._server = None
|
|
335
|
+
|
|
336
|
+
def cleanup(self):
|
|
337
|
+
if INFERENCE_SERVER_FAILED_FILE.exists():
|
|
338
|
+
INFERENCE_SERVER_FAILED_FILE.unlink()
|
|
339
|
+
|
|
340
|
+
def on_startup(self):
|
|
341
|
+
"""
|
|
342
|
+
This method will be started inside the main process, so here is where
|
|
343
|
+
we want to setup our logging and model.
|
|
344
|
+
"""
|
|
345
|
+
self.cleanup()
|
|
346
|
+
self._model.start_load_thread()
|
|
347
|
+
asyncio.create_task(self._shutdown_if_load_fails())
|
|
348
|
+
self._model.setup_polling_for_environment_updates()
|
|
349
|
+
|
|
350
|
+
async def _shutdown_if_load_fails(self):
|
|
351
|
+
while not self._model.ready:
|
|
352
|
+
await asyncio.sleep(0.5)
|
|
353
|
+
if self._model.load_failed:
|
|
354
|
+
assert self._server is not None
|
|
355
|
+
logging.info("Trying shut down after failed model load.")
|
|
356
|
+
self._server.should_exit = True
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
def create_application(self):
|
|
360
|
+
app = FastAPI(
|
|
361
|
+
title="Baseten Inference Server",
|
|
362
|
+
docs_url=None,
|
|
363
|
+
redoc_url=None,
|
|
364
|
+
default_response_class=ORJSONResponse,
|
|
365
|
+
on_startup=[self.on_startup],
|
|
366
|
+
routes=[
|
|
367
|
+
# liveness endpoint
|
|
368
|
+
FastAPIRoute(r"/", lambda: True),
|
|
369
|
+
# readiness endpoint
|
|
370
|
+
FastAPIRoute(
|
|
371
|
+
r"/v1/models/{model_name}", self._endpoints.model_ready, tags=["V1"]
|
|
372
|
+
),
|
|
373
|
+
# loaded endpoint
|
|
374
|
+
FastAPIRoute(
|
|
375
|
+
r"/v1/models/{model_name}/loaded",
|
|
376
|
+
self._endpoints.model_loaded,
|
|
377
|
+
tags=["V1"],
|
|
378
|
+
),
|
|
379
|
+
FastAPIRoute(
|
|
380
|
+
r"/v1/models/{model_name}/schema",
|
|
381
|
+
self._endpoints.schema,
|
|
382
|
+
methods=["GET"],
|
|
383
|
+
tags=["V1"],
|
|
384
|
+
),
|
|
385
|
+
FastAPIRoute(
|
|
386
|
+
r"/v1/models/{model_name}:predict",
|
|
387
|
+
self._endpoints.predict,
|
|
388
|
+
methods=["POST"],
|
|
389
|
+
tags=["V1"],
|
|
390
|
+
),
|
|
391
|
+
FastAPIRoute(
|
|
392
|
+
r"/v1/models/{model_name}:predict_binary",
|
|
393
|
+
self._endpoints.predict,
|
|
394
|
+
methods=["POST"],
|
|
395
|
+
tags=["V1"],
|
|
396
|
+
),
|
|
397
|
+
# OpenAI Spec
|
|
398
|
+
FastAPIRoute(
|
|
399
|
+
r"/v1/chat/completions",
|
|
400
|
+
self._endpoints.chat_completions,
|
|
401
|
+
methods=["POST"],
|
|
402
|
+
tags=["V1"],
|
|
403
|
+
),
|
|
404
|
+
FastAPIRoute(
|
|
405
|
+
r"/v1/completions",
|
|
406
|
+
self._endpoints.completions,
|
|
407
|
+
methods=["POST"],
|
|
408
|
+
tags=["V1"],
|
|
409
|
+
),
|
|
410
|
+
# Endpoint aliases for Sagemaker hosting
|
|
411
|
+
FastAPIRoute(r"/ping", self._endpoints.invocations_ready),
|
|
412
|
+
FastAPIRoute(
|
|
413
|
+
r"/invocations", self._endpoints.invocations, methods=["POST"]
|
|
414
|
+
),
|
|
415
|
+
],
|
|
416
|
+
exception_handlers={
|
|
417
|
+
exc: errors.exception_handler for exc in errors.HANDLED_EXCEPTIONS
|
|
418
|
+
},
|
|
419
|
+
)
|
|
420
|
+
# Above `exception_handlers` only triggers on exact exception classes.
|
|
421
|
+
# This here is a fallback to add our custom headers in all other cases.
|
|
422
|
+
app.add_exception_handler(Exception, errors.exception_handler)
|
|
423
|
+
|
|
424
|
+
return app
|
|
425
|
+
|
|
426
|
+
def start(self):
|
|
427
|
+
log_level = (
|
|
428
|
+
"DEBUG"
|
|
429
|
+
if self._config["runtime"].get("enable_debug_logs", False)
|
|
430
|
+
else "INFO"
|
|
431
|
+
)
|
|
432
|
+
cfg = uvicorn.Config(
|
|
433
|
+
self.create_application(),
|
|
434
|
+
# We hard-code the http parser as h11 (the default) in case the user has
|
|
435
|
+
# httptools installed, which does not work with our requests & version
|
|
436
|
+
# of uvicorn.
|
|
437
|
+
http="h11",
|
|
438
|
+
host="0.0.0.0",
|
|
439
|
+
port=self._http_port,
|
|
440
|
+
workers=1,
|
|
441
|
+
timeout_graceful_shutdown=TIMEOUT_GRACEFUL_SHUTDOWN,
|
|
442
|
+
log_config=log_config.make_log_config(log_level),
|
|
443
|
+
)
|
|
444
|
+
cfg.setup_event_loop() # Call this so uvloop gets used
|
|
445
|
+
server = uvicorn.Server(config=cfg)
|
|
446
|
+
self._server = server
|
|
447
|
+
asyncio.run(server.serve())
|
|
@@ -7,8 +7,9 @@
|
|
|
7
7
|
{% block base_image_patch %}
|
|
8
8
|
# If user base image is supplied in config, apply build commands from truss base image
|
|
9
9
|
{% if config.base_image %}
|
|
10
|
-
|
|
11
|
-
ENV
|
|
10
|
+
{%- if not config.docker_server %}
|
|
11
|
+
ENV PYTHONUNBUFFERED="True"
|
|
12
|
+
ENV DEBIAN_FRONTEND="noninteractive"
|
|
12
13
|
|
|
13
14
|
RUN apt update && \
|
|
14
15
|
apt install -y bash \
|
|
@@ -21,7 +22,11 @@ RUN apt update && \
|
|
|
21
22
|
&& apt-get clean -y \
|
|
22
23
|
&& rm -rf /var/lib/apt/lists/*
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
COPY ./{{base_server_requirements_filename}} {{base_server_requirements_filename}}
|
|
26
|
+
RUN pip install -r {{base_server_requirements_filename}} --no-cache-dir && rm -rf /root/.cache/pip
|
|
27
|
+
{%- endif %}
|
|
28
|
+
|
|
29
|
+
{%- if config.live_reload and not config.docker_server%}
|
|
25
30
|
RUN $PYTHON_EXECUTABLE -m venv -h >/dev/null \
|
|
26
31
|
|| { pythonVersion=$(echo $($PYTHON_EXECUTABLE --version) | cut -d" " -f2 | cut -d"." -f1,2) \
|
|
27
32
|
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
@@ -39,6 +44,10 @@ RUN ln -sf {{config.base_image.python_executable_path}} /usr/local/bin/python
|
|
|
39
44
|
{% endblock %}
|
|
40
45
|
|
|
41
46
|
{% block install_requirements %}
|
|
47
|
+
{%- if should_install_server_requirements %}
|
|
48
|
+
COPY ./{{server_requirements_filename}} {{server_requirements_filename}}
|
|
49
|
+
RUN pip install -r {{server_requirements_filename}} --no-cache-dir && rm -rf /root/.cache/pip
|
|
50
|
+
{%- endif %}
|
|
42
51
|
{{ super() }}
|
|
43
52
|
{% endblock %}
|
|
44
53
|
|
|
@@ -55,26 +64,65 @@ RUN mkdir -p {{ dst.parent }}; curl -L "{{ url }}" -o {{ dst }}
|
|
|
55
64
|
{% endfor %}
|
|
56
65
|
{%- endif %}
|
|
57
66
|
|
|
67
|
+
|
|
68
|
+
{%- if build_commands %}
|
|
69
|
+
{% for command in build_commands %}
|
|
70
|
+
RUN {% for secret,path in config.build.secret_to_path_mapping.items() %} --mount=type=secret,id={{secret}},target={{path}}{% endfor %} {{ command }}
|
|
71
|
+
{% endfor %}
|
|
72
|
+
{%- endif %}
|
|
73
|
+
|
|
58
74
|
# Copy data before code for better caching
|
|
59
75
|
{%- if data_dir_exists %}
|
|
60
76
|
COPY ./{{config.data_dir}} /app/data
|
|
61
77
|
{%- endif %}
|
|
62
78
|
|
|
63
|
-
|
|
79
|
+
{%- if not config.docker_server %}
|
|
80
|
+
COPY ./server /app
|
|
81
|
+
{%- endif %}
|
|
82
|
+
|
|
83
|
+
{%- if use_local_chains_src %}
|
|
84
|
+
{# This path takes precedence over site-packages. #}
|
|
85
|
+
COPY ./truss_chains /app/truss_chains
|
|
86
|
+
{%- endif %}
|
|
87
|
+
|
|
64
88
|
COPY ./config.yaml /app/config.yaml
|
|
89
|
+
{%- if config.live_reload and not config.docker_server%}
|
|
90
|
+
COPY ./control /control
|
|
91
|
+
RUN python3 -m venv /control/.env \
|
|
92
|
+
&& /control/.env/bin/pip3 install -r /control/requirements.txt
|
|
93
|
+
{%- endif %}
|
|
94
|
+
{%- if model_dir_exists %}
|
|
95
|
+
COPY ./{{ config.model_module_dir }} /app/model
|
|
96
|
+
{%- endif %}
|
|
65
97
|
{% endblock %}
|
|
66
98
|
|
|
99
|
+
|
|
67
100
|
{% block run %}
|
|
68
|
-
{%- if config.
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
101
|
+
{%- if config.docker_server %}
|
|
102
|
+
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
103
|
+
curl nginx python3-pip && \
|
|
104
|
+
rm -rf /var/lib/apt/lists/*
|
|
105
|
+
COPY ./docker_server_requirements.txt /app/docker_server_requirements.txt
|
|
106
|
+
RUN pip install -r /app/docker_server_requirements.txt --no-cache-dir && rm -rf /root/.cache/pip
|
|
107
|
+
{% set proxy_config_path = "/etc/nginx/conf.d/proxy.conf" %}
|
|
108
|
+
{% set supervisor_config_path = "/etc/supervisor/supervisord.conf" %}
|
|
109
|
+
{% set supervisor_log_dir = "/var/log/supervisor" %}
|
|
110
|
+
{% set supervisor_server_url = "http://localhost:8080" %}
|
|
111
|
+
COPY ./proxy.conf {{ proxy_config_path }}
|
|
112
|
+
RUN mkdir -p {{ supervisor_log_dir }}
|
|
113
|
+
COPY supervisord.conf {{ supervisor_config_path }}
|
|
114
|
+
ENV SUPERVISOR_SERVER_URL="{{ supervisor_server_url }}"
|
|
115
|
+
ENV SERVER_START_CMD="supervisord -c {{ supervisor_config_path }}"
|
|
116
|
+
ENTRYPOINT ["supervisord", "-c", "{{ supervisor_config_path }}"]
|
|
117
|
+
{%- elif config.live_reload %}
|
|
118
|
+
ENV HASH_TRUSS="{{truss_hash}}"
|
|
119
|
+
ENV CONTROL_SERVER_PORT="8080"
|
|
120
|
+
ENV INFERENCE_SERVER_PORT="8090"
|
|
121
|
+
ENV SERVER_START_CMD="/control/.env/bin/python3 /control/control/server.py"
|
|
122
|
+
ENTRYPOINT ["/control/.env/bin/python3", "/control/control/server.py"]
|
|
75
123
|
{%- else %}
|
|
76
|
-
ENV INFERENCE_SERVER_PORT
|
|
77
|
-
ENV SERVER_START_CMD="{{(config.base_image.python_executable_path or "python3") ~ "
|
|
78
|
-
ENTRYPOINT ["{{config.base_image.python_executable_path or "python3"}}", "
|
|
124
|
+
ENV INFERENCE_SERVER_PORT="8080"
|
|
125
|
+
ENV SERVER_START_CMD="{{(config.base_image.python_executable_path or "python3") ~ " /app/main.py"}}"
|
|
126
|
+
ENTRYPOINT ["{{config.base_image.python_executable_path or "python3"}}", "/app/main.py"]
|
|
79
127
|
{%- endif %}
|
|
80
128
|
{% endblock %}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import aiofiles
|
|
5
|
+
|
|
6
|
+
DYNAMIC_CONFIG_MOUNT_DIR = "/etc/b10_dynamic_config"
|
|
7
|
+
ENVIRONMENT_DYNAMIC_CONFIG_KEY = "environment"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_dynamic_config_value_sync(key: str) -> Optional[str]:
|
|
11
|
+
dynamic_config_path = Path(DYNAMIC_CONFIG_MOUNT_DIR) / key
|
|
12
|
+
if dynamic_config_path.exists():
|
|
13
|
+
with dynamic_config_path.open() as dynamic_config_file:
|
|
14
|
+
return dynamic_config_file.read()
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_dynamic_config_file_path(key: str):
|
|
19
|
+
dynamic_config_path = Path(DYNAMIC_CONFIG_MOUNT_DIR) / key
|
|
20
|
+
return dynamic_config_path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def get_dynamic_config_value_async(key: str) -> Optional[str]:
|
|
24
|
+
dynamic_config_path = get_dynamic_config_file_path(key)
|
|
25
|
+
if dynamic_config_path.exists():
|
|
26
|
+
async with aiofiles.open(dynamic_config_path, "r") as dynamic_config_file:
|
|
27
|
+
return await dynamic_config_file.read()
|
|
28
|
+
return None
|