truss 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truss might be problematic. Click here for more details.
- truss/__init__.py +10 -3
- truss/api/__init__.py +123 -0
- truss/api/definitions.py +51 -0
- truss/base/constants.py +116 -0
- truss/base/custom_types.py +29 -0
- truss/{errors.py → base/errors.py} +4 -0
- truss/base/trt_llm_config.py +310 -0
- truss/{truss_config.py → base/truss_config.py} +344 -31
- truss/{truss_spec.py → base/truss_spec.py} +20 -6
- truss/{validation.py → base/validation.py} +60 -11
- truss/cli/cli.py +841 -88
- truss/{remote → cli}/remote_cli.py +2 -7
- truss/contexts/docker_build_setup.py +67 -0
- truss/contexts/image_builder/cache_warmer.py +2 -8
- truss/contexts/image_builder/image_builder.py +1 -1
- truss/contexts/image_builder/serving_image_builder.py +292 -46
- truss/contexts/image_builder/util.py +1 -3
- truss/contexts/local_loader/docker_build_emulator.py +58 -0
- truss/contexts/local_loader/load_model_local.py +2 -2
- truss/contexts/local_loader/truss_module_loader.py +1 -1
- truss/contexts/local_loader/utils.py +1 -1
- truss/local/local_config.py +2 -6
- truss/local/local_config_handler.py +20 -5
- truss/patch/__init__.py +1 -0
- truss/patch/hash.py +4 -70
- truss/patch/signature.py +4 -16
- truss/patch/truss_dir_patch_applier.py +3 -78
- truss/remote/baseten/api.py +308 -23
- truss/remote/baseten/auth.py +3 -3
- truss/remote/baseten/core.py +257 -50
- truss/remote/baseten/custom_types.py +44 -0
- truss/remote/baseten/error.py +4 -0
- truss/remote/baseten/remote.py +369 -118
- truss/remote/baseten/service.py +118 -11
- truss/remote/baseten/utils/status.py +29 -0
- truss/remote/baseten/utils/tar.py +34 -22
- truss/remote/baseten/utils/transfer.py +36 -23
- truss/remote/remote_factory.py +14 -5
- truss/remote/truss_remote.py +72 -45
- truss/templates/base.Dockerfile.jinja +18 -16
- truss/templates/cache.Dockerfile.jinja +3 -3
- truss/{server → templates/control}/control/application.py +14 -35
- truss/{server → templates/control}/control/endpoints.py +39 -9
- truss/{server/control/patch/types.py → templates/control/control/helpers/custom_types.py} +13 -52
- truss/{server → templates/control}/control/helpers/inference_server_controller.py +4 -8
- truss/{server → templates/control}/control/helpers/inference_server_process_controller.py +2 -4
- truss/{server → templates/control}/control/helpers/inference_server_starter.py +5 -10
- truss/{server/control → templates/control/control/helpers}/truss_patch/model_code_patch_applier.py +8 -6
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/model_container_patch_applier.py +18 -26
- truss/templates/control/control/helpers/truss_patch/requirement_name_identifier.py +66 -0
- truss/{server → templates/control}/control/server.py +11 -6
- truss/templates/control/requirements.txt +9 -0
- truss/templates/custom_python_dx/my_model.py +28 -0
- truss/templates/docker_server/proxy.conf.jinja +42 -0
- truss/templates/docker_server/supervisord.conf.jinja +27 -0
- truss/templates/docker_server_requirements.txt +1 -0
- truss/templates/server/common/errors.py +231 -0
- truss/{server → templates/server}/common/patches/whisper/patch.py +1 -0
- truss/{server/common/patches/__init__.py → templates/server/common/patches.py} +1 -3
- truss/{server → templates/server}/common/retry.py +1 -0
- truss/{server → templates/server}/common/schema.py +11 -9
- truss/templates/server/common/tracing.py +157 -0
- truss/templates/server/main.py +9 -0
- truss/templates/server/model_wrapper.py +961 -0
- truss/templates/server/requirements.txt +21 -0
- truss/templates/server/truss_server.py +447 -0
- truss/templates/server.Dockerfile.jinja +62 -14
- truss/templates/shared/dynamic_config_resolver.py +28 -0
- truss/templates/shared/lazy_data_resolver.py +164 -0
- truss/templates/shared/log_config.py +125 -0
- truss/{server → templates}/shared/secrets_resolver.py +1 -2
- truss/{server → templates}/shared/serialization.py +31 -9
- truss/{server → templates}/shared/util.py +3 -13
- truss/templates/trtllm-audio/model/model.py +49 -0
- truss/templates/trtllm-audio/packages/sigint_patch.py +14 -0
- truss/templates/trtllm-audio/packages/whisper_trt/__init__.py +215 -0
- truss/templates/trtllm-audio/packages/whisper_trt/assets.py +25 -0
- truss/templates/trtllm-audio/packages/whisper_trt/batching.py +52 -0
- truss/templates/trtllm-audio/packages/whisper_trt/custom_types.py +26 -0
- truss/templates/trtllm-audio/packages/whisper_trt/modeling.py +184 -0
- truss/templates/trtllm-audio/packages/whisper_trt/tokenizer.py +185 -0
- truss/templates/trtllm-audio/packages/whisper_trt/utils.py +245 -0
- truss/templates/trtllm-briton/src/extension.py +64 -0
- truss/tests/conftest.py +302 -94
- truss/tests/contexts/image_builder/test_serving_image_builder.py +74 -31
- truss/tests/contexts/local_loader/test_load_local.py +2 -2
- truss/tests/contexts/local_loader/test_truss_module_finder.py +1 -1
- truss/tests/patch/test_calc_patch.py +439 -127
- truss/tests/patch/test_dir_signature.py +3 -12
- truss/tests/patch/test_hash.py +1 -1
- truss/tests/patch/test_signature.py +1 -1
- truss/tests/patch/test_truss_dir_patch_applier.py +23 -11
- truss/tests/patch/test_types.py +2 -2
- truss/tests/remote/baseten/test_api.py +153 -58
- truss/tests/remote/baseten/test_auth.py +2 -1
- truss/tests/remote/baseten/test_core.py +160 -12
- truss/tests/remote/baseten/test_remote.py +489 -77
- truss/tests/remote/baseten/test_service.py +55 -0
- truss/tests/remote/test_remote_factory.py +16 -18
- truss/tests/remote/test_truss_remote.py +26 -17
- truss/tests/templates/control/control/helpers/test_context_managers.py +11 -0
- truss/tests/templates/control/control/helpers/test_model_container_patch_applier.py +184 -0
- truss/tests/templates/control/control/helpers/test_requirement_name_identifier.py +89 -0
- truss/tests/{server → templates/control}/control/test_server.py +79 -24
- truss/tests/{server → templates/control}/control/test_server_integration.py +24 -16
- truss/tests/templates/core/server/test_dynamic_config_resolver.py +108 -0
- truss/tests/templates/core/server/test_lazy_data_resolver.py +329 -0
- truss/tests/templates/core/server/test_lazy_data_resolver_v2.py +79 -0
- truss/tests/{server → templates}/core/server/test_secrets_resolver.py +1 -1
- truss/tests/{server → templates/server}/common/test_retry.py +3 -3
- truss/tests/templates/server/test_model_wrapper.py +248 -0
- truss/tests/{server → templates/server}/test_schema.py +3 -5
- truss/tests/{server/core/server/common → templates/server}/test_truss_server.py +8 -5
- truss/tests/test_build.py +9 -52
- truss/tests/test_config.py +336 -77
- truss/tests/test_context_builder_image.py +3 -11
- truss/tests/test_control_truss_patching.py +7 -12
- truss/tests/test_custom_server.py +38 -0
- truss/tests/test_data/context_builder_image_test/test.py +3 -0
- truss/tests/test_data/happy.ipynb +56 -0
- truss/tests/test_data/model_load_failure_test/config.yaml +2 -0
- truss/tests/test_data/model_load_failure_test/model/__init__.py +0 -0
- truss/tests/test_data/patch_ping_test_server/__init__.py +0 -0
- truss/{test_data → tests/test_data}/patch_ping_test_server/app.py +3 -9
- truss/{test_data → tests/test_data}/server.Dockerfile +20 -21
- truss/tests/test_data/server_conformance_test_truss/__init__.py +0 -0
- truss/tests/test_data/server_conformance_test_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/model/model.py +1 -3
- truss/tests/test_data/test_async_truss/__init__.py +0 -0
- truss/tests/test_data/test_async_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/__init__.py +0 -0
- truss/tests/test_data/test_basic_truss/config.yaml +16 -0
- truss/tests/test_data/test_basic_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/__init__.py +0 -0
- truss/tests/test_data/test_build_commands/config.yaml +13 -0
- truss/tests/test_data/test_build_commands/model/__init__.py +0 -0
- truss/{test_data/test_streaming_async_generator_truss → tests/test_data/test_build_commands}/model/model.py +2 -3
- truss/tests/test_data/test_build_commands_failure/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/config.yaml +14 -0
- truss/tests/test_data/test_build_commands_failure/model/__init__.py +0 -0
- truss/tests/test_data/test_build_commands_failure/model/model.py +17 -0
- truss/tests/test_data/test_concurrency_truss/__init__.py +0 -0
- truss/tests/test_data/test_concurrency_truss/config.yaml +4 -0
- truss/tests/test_data/test_concurrency_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/config.yaml +20 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/Dockerfile +17 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md +10 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/VERSION +1 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/__init__.py +0 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/app.py +19 -0
- truss/tests/test_data/test_custom_server_truss/test_docker_image/build_upload_new_image.sh +6 -0
- truss/tests/test_data/test_openai/__init__.py +0 -0
- truss/{test_data/test_basic_truss → tests/test_data/test_openai}/config.yaml +1 -2
- truss/tests/test_data/test_openai/model/__init__.py +0 -0
- truss/tests/test_data/test_openai/model/model.py +15 -0
- truss/tests/test_data/test_pyantic_v1/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v1/model/model.py +28 -0
- truss/tests/test_data/test_pyantic_v1/requirements.txt +1 -0
- truss/tests/test_data/test_pyantic_v2/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/config.yaml +13 -0
- truss/tests/test_data/test_pyantic_v2/model/__init__.py +0 -0
- truss/tests/test_data/test_pyantic_v2/model/model.py +30 -0
- truss/tests/test_data/test_pyantic_v2/requirements.txt +1 -0
- truss/tests/test_data/test_requirements_file_truss/__init__.py +0 -0
- truss/tests/test_data/test_requirements_file_truss/config.yaml +13 -0
- truss/tests/test_data/test_requirements_file_truss/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_requirements_file_truss/model/model.py +1 -0
- truss/tests/test_data/test_streaming_async_generator_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_async_generator_truss/model/model.py +7 -0
- truss/tests/test_data/test_streaming_read_timeout/__init__.py +0 -0
- truss/tests/test_data/test_streaming_read_timeout/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss/config.yaml +4 -0
- truss/tests/test_data/test_streaming_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/test_streaming_truss_with_error/model/model.py +3 -11
- truss/tests/test_data/test_streaming_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/config.yaml +43 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/__init__.py +0 -0
- truss/tests/test_data/test_streaming_truss_with_tracing/model/model.py +65 -0
- truss/tests/test_data/test_trt_llm_truss/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/config.yaml +15 -0
- truss/tests/test_data/test_trt_llm_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_trt_llm_truss/model/model.py +15 -0
- truss/tests/test_data/test_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss/config.yaml +4 -0
- truss/tests/test_data/test_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss/model/dummy +0 -0
- truss/tests/test_data/test_truss/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss/packages/test_package/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/__init__.py +0 -0
- truss/tests/test_data/test_truss_server_caching_truss/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/config.yaml +4 -0
- truss/tests/test_data/test_truss_with_error/model/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/model/model.py +8 -0
- truss/tests/test_data/test_truss_with_error/packages/__init__.py +0 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_1.py +5 -0
- truss/tests/test_data/test_truss_with_error/packages/helpers_2.py +2 -0
- truss/tests/test_docker.py +2 -1
- truss/tests/test_model_inference.py +1340 -292
- truss/tests/test_model_schema.py +33 -26
- truss/tests/test_testing_utilities_for_other_tests.py +50 -5
- truss/tests/test_truss_gatherer.py +3 -5
- truss/tests/test_truss_handle.py +62 -59
- truss/tests/test_util.py +2 -1
- truss/tests/test_validation.py +15 -13
- truss/tests/trt_llm/test_trt_llm_config.py +41 -0
- truss/tests/trt_llm/test_validation.py +91 -0
- truss/tests/util/test_config_checks.py +40 -0
- truss/tests/util/test_env_vars.py +14 -0
- truss/tests/util/test_path.py +10 -23
- truss/trt_llm/config_checks.py +43 -0
- truss/trt_llm/validation.py +42 -0
- truss/truss_handle/__init__.py +0 -0
- truss/truss_handle/build.py +122 -0
- truss/{decorators.py → truss_handle/decorators.py} +1 -1
- truss/truss_handle/patch/__init__.py +0 -0
- truss/{patch → truss_handle/patch}/calc_patch.py +146 -92
- truss/{types.py → truss_handle/patch/custom_types.py} +35 -27
- truss/{patch → truss_handle/patch}/dir_signature.py +1 -1
- truss/truss_handle/patch/hash.py +71 -0
- truss/{patch → truss_handle/patch}/local_truss_patch_applier.py +6 -4
- truss/truss_handle/patch/signature.py +22 -0
- truss/truss_handle/patch/truss_dir_patch_applier.py +87 -0
- truss/{readme_generator.py → truss_handle/readme_generator.py} +3 -2
- truss/{truss_gatherer.py → truss_handle/truss_gatherer.py} +3 -2
- truss/{truss_handle.py → truss_handle/truss_handle.py} +174 -78
- truss/util/.truss_ignore +3 -0
- truss/{docker.py → util/docker.py} +6 -2
- truss/util/download.py +6 -15
- truss/util/env_vars.py +41 -0
- truss/util/log_utils.py +52 -0
- truss/util/path.py +20 -20
- truss/util/requirements.py +11 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/METADATA +18 -16
- truss-0.60.0.dist-info/RECORD +324 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/WHEEL +1 -1
- truss-0.60.0.dist-info/entry_points.txt +4 -0
- truss_chains/__init__.py +71 -0
- truss_chains/definitions.py +756 -0
- truss_chains/deployment/__init__.py +0 -0
- truss_chains/deployment/code_gen.py +816 -0
- truss_chains/deployment/deployment_client.py +871 -0
- truss_chains/framework.py +1480 -0
- truss_chains/public_api.py +231 -0
- truss_chains/py.typed +0 -0
- truss_chains/pydantic_numpy.py +131 -0
- truss_chains/reference_code/reference_chainlet.py +34 -0
- truss_chains/reference_code/reference_model.py +10 -0
- truss_chains/remote_chainlet/__init__.py +0 -0
- truss_chains/remote_chainlet/model_skeleton.py +60 -0
- truss_chains/remote_chainlet/stub.py +380 -0
- truss_chains/remote_chainlet/utils.py +332 -0
- truss_chains/streaming.py +378 -0
- truss_chains/utils.py +178 -0
- CODE_OF_CONDUCT.md +0 -131
- CONTRIBUTING.md +0 -48
- README.md +0 -137
- context_builder.Dockerfile +0 -24
- truss/blob/blob_backend.py +0 -10
- truss/blob/blob_backend_registry.py +0 -23
- truss/blob/http_public_blob_backend.py +0 -23
- truss/build/__init__.py +0 -2
- truss/build/build.py +0 -143
- truss/build/configure.py +0 -63
- truss/cli/__init__.py +0 -2
- truss/cli/console.py +0 -5
- truss/cli/create.py +0 -5
- truss/config/trt_llm.py +0 -81
- truss/constants.py +0 -61
- truss/model_inference.py +0 -123
- truss/patch/types.py +0 -30
- truss/pytest.ini +0 -7
- truss/server/common/errors.py +0 -100
- truss/server/common/termination_handler_middleware.py +0 -64
- truss/server/common/truss_server.py +0 -389
- truss/server/control/patch/model_code_patch_applier.py +0 -46
- truss/server/control/patch/requirement_name_identifier.py +0 -17
- truss/server/inference_server.py +0 -29
- truss/server/model_wrapper.py +0 -434
- truss/server/shared/logging.py +0 -81
- truss/templates/trtllm/model/model.py +0 -97
- truss/templates/trtllm/packages/build_engine_utils.py +0 -34
- truss/templates/trtllm/packages/constants.py +0 -11
- truss/templates/trtllm/packages/schema.py +0 -216
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/ensemble/config.pbtxt +0 -246
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/1/model.py +0 -181
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/config.pbtxt +0 -64
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/1/model.py +0 -260
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/config.pbtxt +0 -99
- truss/templates/trtllm/packages/tensorrt_llm_model_repository/tensorrt_llm/config.pbtxt +0 -208
- truss/templates/trtllm/packages/triton_client.py +0 -150
- truss/templates/trtllm/packages/utils.py +0 -43
- truss/test_data/context_builder_image_test/test.py +0 -4
- truss/test_data/happy.ipynb +0 -54
- truss/test_data/model_load_failure_test/config.yaml +0 -2
- truss/test_data/test_concurrency_truss/config.yaml +0 -2
- truss/test_data/test_streaming_async_generator_truss/config.yaml +0 -2
- truss/test_data/test_streaming_truss/config.yaml +0 -3
- truss/test_data/test_truss/config.yaml +0 -2
- truss/tests/server/common/test_termination_handler_middleware.py +0 -93
- truss/tests/server/control/test_model_container_patch_applier.py +0 -203
- truss/tests/server/core/server/common/test_util.py +0 -19
- truss/tests/server/test_model_wrapper.py +0 -87
- truss/util/data_structures.py +0 -16
- truss-0.10.0rc1.dist-info/RECORD +0 -216
- truss-0.10.0rc1.dist-info/entry_points.txt +0 -3
- truss/{server/shared → base}/__init__.py +0 -0
- truss/{server → templates/control}/control/helpers/context_managers.py +0 -0
- truss/{server/control → templates/control/control/helpers}/errors.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/__init__.py +0 -0
- truss/{server/control/patch → templates/control/control/helpers/truss_patch}/system_packages.py +0 -0
- truss/{test_data/annotated_types_truss/model → templates/server}/__init__.py +0 -0
- truss/{server → templates/server}/common/__init__.py +0 -0
- truss/{test_data/gcs_fix/model → templates/shared}/__init__.py +0 -0
- truss/templates/{trtllm → trtllm-briton}/README.md +0 -0
- truss/{test_data/server_conformance_test_truss/model → tests/test_data}/__init__.py +0 -0
- truss/{test_data/test_basic_truss/model → tests/test_data/annotated_types_truss}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/config.yaml +0 -0
- truss/{test_data/test_requirements_file_truss → tests/test_data/annotated_types_truss}/model/__init__.py +0 -0
- truss/{test_data → tests/test_data}/annotated_types_truss/model/model.py +0 -0
- truss/{test_data → tests/test_data}/auto-mpg.data +0 -0
- truss/{test_data → tests/test_data}/context_builder_image_test/Dockerfile +0 -0
- truss/{test_data/test_truss/model → tests/test_data/context_builder_image_test}/__init__.py +0 -0
- truss/{test_data/test_truss_server_caching_truss/model → tests/test_data/gcs_fix}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/config.yaml +0 -0
- truss/tests/{local → test_data/gcs_fix/model}/__init__.py +0 -0
- truss/{test_data → tests/test_data}/gcs_fix/model/model.py +0 -0
- truss/{test_data/test_truss/model/dummy → tests/test_data/model_load_failure_test/__init__.py} +0 -0
- truss/{test_data → tests/test_data}/model_load_failure_test/model/model.py +0 -0
- truss/{test_data → tests/test_data}/pima-indians-diabetes.csv +0 -0
- truss/{test_data → tests/test_data}/readme_int_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_no_example.md +0 -0
- truss/{test_data → tests/test_data}/readme_str_example.md +0 -0
- truss/{test_data → tests/test_data}/server_conformance_test_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/config.yaml +0 -0
- truss/{test_data → tests/test_data}/test_async_truss/model/model.py +3 -3
- /truss/{test_data → tests/test_data}/test_basic_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_concurrency_truss/model/model.py +0 -0
- /truss/{test_data/test_requirements_file_truss → tests/test_data/test_pyantic_v1}/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_requirements_file_truss/requirements.txt +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_read_timeout/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_streaming_truss_with_error/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/examples.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss/model/model.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss/packages/test_package/test.py +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/config.yaml +0 -0
- /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/model/model.py +0 -0
- /truss/{patch → truss_handle/patch}/constants.py +0 -0
- /truss/{notebook.py → util/notebook.py} +0 -0
- {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import asyncio
|
|
3
|
+
import contextlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
AsyncIterator,
|
|
11
|
+
ClassVar,
|
|
12
|
+
Dict,
|
|
13
|
+
Iterator,
|
|
14
|
+
Mapping,
|
|
15
|
+
Optional,
|
|
16
|
+
Type,
|
|
17
|
+
TypeVar,
|
|
18
|
+
Union,
|
|
19
|
+
final,
|
|
20
|
+
overload,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
import aiohttp
|
|
24
|
+
import httpx
|
|
25
|
+
import pydantic
|
|
26
|
+
import tenacity
|
|
27
|
+
from truss.templates.shared import serialization
|
|
28
|
+
|
|
29
|
+
from truss_chains import definitions
|
|
30
|
+
from truss_chains.remote_chainlet import utils
|
|
31
|
+
|
|
32
|
+
DEFAULT_MAX_CONNECTIONS = 1000
|
|
33
|
+
DEFAULT_MAX_KEEPALIVE_CONNECTIONS = 400
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_RetryPolicyT = TypeVar("_RetryPolicyT", tenacity.AsyncRetrying, tenacity.Retrying)
|
|
37
|
+
InputT = TypeVar("InputT", pydantic.BaseModel, Any) # Any signifies "JSON".
|
|
38
|
+
OutputModelT = TypeVar("OutputModelT", bound=pydantic.BaseModel)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BasetenSession:
|
|
42
|
+
"""Provides configured HTTP clients, retries rate limit warning etc."""
|
|
43
|
+
|
|
44
|
+
_client_cycle_time_sec: ClassVar[int] = 3600 * 1 # 1 hour.
|
|
45
|
+
_client_limits: ClassVar[httpx.Limits] = httpx.Limits(
|
|
46
|
+
max_connections=DEFAULT_MAX_CONNECTIONS,
|
|
47
|
+
max_keepalive_connections=DEFAULT_MAX_KEEPALIVE_CONNECTIONS,
|
|
48
|
+
)
|
|
49
|
+
_auth_header: Mapping[str, str]
|
|
50
|
+
_service_descriptor: definitions.DeployedServiceDescriptor
|
|
51
|
+
_cached_sync_client: Optional[tuple[httpx.Client, int]]
|
|
52
|
+
_cached_async_client: Optional[tuple[aiohttp.ClientSession, int]]
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self, service_descriptor: definitions.DeployedServiceDescriptor, api_key: str
|
|
56
|
+
) -> None:
|
|
57
|
+
logging.info(
|
|
58
|
+
f"Creating BasetenSession (HTTP) for `{service_descriptor.name}`.\n"
|
|
59
|
+
f"\tTarget: `{service_descriptor.predict_url}`\n"
|
|
60
|
+
f"\t`{service_descriptor.options}`."
|
|
61
|
+
)
|
|
62
|
+
self._auth_header = {"Authorization": f"Api-Key {api_key}"}
|
|
63
|
+
self._service_descriptor = service_descriptor
|
|
64
|
+
self._cached_sync_client = None
|
|
65
|
+
self._cached_async_client = None
|
|
66
|
+
self._sync_lock = threading.Lock()
|
|
67
|
+
self._async_lock = asyncio.Lock()
|
|
68
|
+
self._sync_num_requests = utils.ThreadSafeCounter()
|
|
69
|
+
self._async_num_requests = utils.AsyncSafeCounter()
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def name(self) -> str:
|
|
73
|
+
return self._service_descriptor.name
|
|
74
|
+
|
|
75
|
+
def _maybe_warn_for_overload(self, num_requests: int) -> None:
|
|
76
|
+
if self._client_limits.max_connections is None:
|
|
77
|
+
return
|
|
78
|
+
if num_requests > self._client_limits.max_connections * 0.8:
|
|
79
|
+
logging.warning(
|
|
80
|
+
f"High number of concurrently outgoing HTTP connections: "
|
|
81
|
+
f"`{num_requests}`. Close to or above connection limit of "
|
|
82
|
+
f"`{self._client_limits.max_connections}`. To avoid overload and "
|
|
83
|
+
f"timeouts, use more replicas/autoscaling for this chainlet."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _client_cycle_needed(self, cached_client: Optional[tuple[Any, int]]) -> bool:
|
|
87
|
+
return (
|
|
88
|
+
not cached_client
|
|
89
|
+
or (int(time.time()) - cached_client[1]) > self._client_cycle_time_sec
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _log_retry(self, retry_state: tenacity.RetryCallState) -> None:
|
|
93
|
+
logging.info(f"Retrying `{self.name}`, attempt {retry_state.attempt_number}")
|
|
94
|
+
|
|
95
|
+
def _make_retry_policy(self, retrying: Type[_RetryPolicyT]) -> _RetryPolicyT:
|
|
96
|
+
return retrying(
|
|
97
|
+
stop=tenacity.stop_after_attempt(self._service_descriptor.options.retries),
|
|
98
|
+
retry=tenacity.retry_if_exception_type(Exception),
|
|
99
|
+
reraise=True,
|
|
100
|
+
before_sleep=self._log_retry,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@contextlib.contextmanager
|
|
104
|
+
def _client_sync(self) -> Iterator[httpx.Client]:
|
|
105
|
+
# Check `_client_cycle_needed` before and after locking to avoid
|
|
106
|
+
# needing a lock each time the client is accessed.
|
|
107
|
+
if self._client_cycle_needed(self._cached_sync_client):
|
|
108
|
+
with self._sync_lock:
|
|
109
|
+
if self._client_cycle_needed(self._cached_sync_client):
|
|
110
|
+
self._cached_sync_client = (
|
|
111
|
+
httpx.Client(
|
|
112
|
+
headers=self._auth_header,
|
|
113
|
+
timeout=self._service_descriptor.options.timeout_sec,
|
|
114
|
+
limits=self._client_limits,
|
|
115
|
+
),
|
|
116
|
+
int(time.time()),
|
|
117
|
+
)
|
|
118
|
+
assert self._cached_sync_client is not None
|
|
119
|
+
client = self._cached_sync_client[0]
|
|
120
|
+
|
|
121
|
+
with self._sync_num_requests as num_requests:
|
|
122
|
+
self._maybe_warn_for_overload(num_requests)
|
|
123
|
+
yield client
|
|
124
|
+
|
|
125
|
+
@contextlib.asynccontextmanager
|
|
126
|
+
async def _client_async(self) -> AsyncIterator[aiohttp.ClientSession]:
|
|
127
|
+
# Check `_client_cycle_needed` before and after locking to avoid
|
|
128
|
+
# needing a lock each time the client is accessed.
|
|
129
|
+
if self._client_cycle_needed(self._cached_async_client):
|
|
130
|
+
async with self._async_lock:
|
|
131
|
+
if self._client_cycle_needed(self._cached_async_client):
|
|
132
|
+
connector = aiohttp.TCPConnector(limit=DEFAULT_MAX_CONNECTIONS)
|
|
133
|
+
self._cached_async_client = (
|
|
134
|
+
aiohttp.ClientSession(
|
|
135
|
+
headers=self._auth_header,
|
|
136
|
+
connector=connector,
|
|
137
|
+
timeout=aiohttp.ClientTimeout(
|
|
138
|
+
total=self._service_descriptor.options.timeout_sec
|
|
139
|
+
),
|
|
140
|
+
),
|
|
141
|
+
int(time.time()),
|
|
142
|
+
)
|
|
143
|
+
assert self._cached_async_client is not None
|
|
144
|
+
client = self._cached_async_client[0]
|
|
145
|
+
|
|
146
|
+
async with self._async_num_requests as num_requests:
|
|
147
|
+
self._maybe_warn_for_overload(num_requests)
|
|
148
|
+
yield client
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class StubBase(BasetenSession, abc.ABC):
|
|
152
|
+
"""Base class for stubs that invoke remote chainlets.
|
|
153
|
+
|
|
154
|
+
Extends ``BasetenSession`` with methods for data serialization, de-serialization
|
|
155
|
+
and invoking other endpoints.
|
|
156
|
+
|
|
157
|
+
It is used internally for RPCs to dependency chainlets, but it can also be used
|
|
158
|
+
in user-code for wrapping a deployed truss model into the Chains framework. It
|
|
159
|
+
flexibly supports JSON and pydantic inputs and output. Example usage::
|
|
160
|
+
|
|
161
|
+
import pydantic
|
|
162
|
+
import truss_chains as chains
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class WhisperOutput(pydantic.BaseModel):
|
|
166
|
+
...
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class DeployedWhisper(chains.StubBase):
|
|
170
|
+
# Input JSON, output JSON.
|
|
171
|
+
async def run_remote(self, audio_b64: str) -> Any:
|
|
172
|
+
return await self.predict_async(
|
|
173
|
+
inputs={"audio": audio_b64})
|
|
174
|
+
# resp == {"text": ..., "language": ...}
|
|
175
|
+
|
|
176
|
+
# OR Input JSON, output pydantic model.
|
|
177
|
+
async def run_remote(self, audio_b64: str) -> WhisperOutput:
|
|
178
|
+
return await self.predict_async(
|
|
179
|
+
inputs={"audio": audio_b64}, output_model=WhisperOutput)
|
|
180
|
+
|
|
181
|
+
# OR Input and output are pydantic models.
|
|
182
|
+
async def run_remote(self, data: WhisperInput) -> WhisperOutput:
|
|
183
|
+
return await self.predict_async(data, output_model=WhisperOutput)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class MyChainlet(chains.ChainletBase):
|
|
187
|
+
|
|
188
|
+
def __init__(self, ..., context=chains.depends_context()):
|
|
189
|
+
...
|
|
190
|
+
self._whisper = DeployedWhisper.from_url(
|
|
191
|
+
WHISPER_URL,
|
|
192
|
+
context,
|
|
193
|
+
options=chains.RPCOptions(retries=3),
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
async def run_remote(self, ...):
|
|
197
|
+
await self._whisper.run_remote(...)
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
@final
|
|
201
|
+
def __init__(
|
|
202
|
+
self, service_descriptor: definitions.DeployedServiceDescriptor, api_key: str
|
|
203
|
+
) -> None:
|
|
204
|
+
"""
|
|
205
|
+
Args:
|
|
206
|
+
service_descriptor: Contains the URL and other configuration.
|
|
207
|
+
api_key: A baseten API key to authorize requests.
|
|
208
|
+
"""
|
|
209
|
+
super().__init__(service_descriptor, api_key)
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def from_url(
|
|
213
|
+
cls,
|
|
214
|
+
predict_url: str,
|
|
215
|
+
context: definitions.DeploymentContext,
|
|
216
|
+
options: Optional[definitions.RPCOptions] = None,
|
|
217
|
+
):
|
|
218
|
+
"""Factory method, convenient to be used in chainlet's ``__init__``-method.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
predict_url: URL to predict endpoint of another chain / truss model.
|
|
222
|
+
context: Deployment context object, obtained in the chainlet's ``__init__``.
|
|
223
|
+
options: RPC options, e.g. retries.
|
|
224
|
+
"""
|
|
225
|
+
options = options or definitions.RPCOptions()
|
|
226
|
+
return cls(
|
|
227
|
+
service_descriptor=definitions.DeployedServiceDescriptor(
|
|
228
|
+
name=cls.__name__,
|
|
229
|
+
display_name=cls.__name__,
|
|
230
|
+
predict_url=predict_url,
|
|
231
|
+
options=options,
|
|
232
|
+
),
|
|
233
|
+
api_key=context.get_baseten_api_key(),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def _make_request_params(
|
|
237
|
+
self, inputs: InputT, for_httpx: bool = False
|
|
238
|
+
) -> Mapping[str, Any]:
|
|
239
|
+
kwargs: Dict[str, Any] = {}
|
|
240
|
+
headers = {definitions.OTEL_TRACE_PARENT_HEADER_KEY: utils.get_trace_parent()}
|
|
241
|
+
if isinstance(inputs, pydantic.BaseModel):
|
|
242
|
+
if self._service_descriptor.options.use_binary:
|
|
243
|
+
data_dict = inputs.model_dump(mode="python")
|
|
244
|
+
data_key = "content" if for_httpx else "data"
|
|
245
|
+
kwargs[data_key] = serialization.truss_msgpack_serialize(data_dict)
|
|
246
|
+
headers["Content-Type"] = "application/octet-stream"
|
|
247
|
+
else:
|
|
248
|
+
data_key = "content" if for_httpx else "data"
|
|
249
|
+
kwargs[data_key] = inputs.model_dump_json()
|
|
250
|
+
headers["Content-Type"] = "application/json"
|
|
251
|
+
else: # inputs is JSON dict.
|
|
252
|
+
if self._service_descriptor.options.use_binary:
|
|
253
|
+
data_key = "content" if for_httpx else "data"
|
|
254
|
+
kwargs[data_key] = serialization.truss_msgpack_serialize(inputs)
|
|
255
|
+
headers["Content-Type"] = "application/octet-stream"
|
|
256
|
+
else:
|
|
257
|
+
kwargs["json"] = inputs
|
|
258
|
+
headers["Content-Type"] = "application/json"
|
|
259
|
+
|
|
260
|
+
kwargs["headers"] = headers
|
|
261
|
+
return kwargs
|
|
262
|
+
|
|
263
|
+
def _response_to_pydantic(
|
|
264
|
+
self, response: bytes, output_model: Type[OutputModelT]
|
|
265
|
+
) -> OutputModelT:
|
|
266
|
+
if self._service_descriptor.options.use_binary:
|
|
267
|
+
data_dict = serialization.truss_msgpack_deserialize(response)
|
|
268
|
+
return output_model.model_validate(data_dict)
|
|
269
|
+
return output_model.model_validate_json(response)
|
|
270
|
+
|
|
271
|
+
def _response_to_json(self, response: bytes) -> Any:
|
|
272
|
+
if self._service_descriptor.options.use_binary:
|
|
273
|
+
return serialization.truss_msgpack_deserialize(response)
|
|
274
|
+
return json.loads(response)
|
|
275
|
+
|
|
276
|
+
@overload
|
|
277
|
+
def predict_sync(
|
|
278
|
+
self, inputs: InputT, output_model: Type[OutputModelT]
|
|
279
|
+
) -> OutputModelT: ...
|
|
280
|
+
|
|
281
|
+
@overload # Returns JSON
|
|
282
|
+
def predict_sync(self, inputs: InputT, output_model: None = None) -> Any: ...
|
|
283
|
+
|
|
284
|
+
def predict_sync(
|
|
285
|
+
self, inputs: InputT, output_model: Optional[Type[OutputModelT]] = None
|
|
286
|
+
) -> Union[OutputModelT, Any]:
|
|
287
|
+
retry = self._make_retry_policy(tenacity.Retrying)
|
|
288
|
+
params = self._make_request_params(inputs, for_httpx=True)
|
|
289
|
+
|
|
290
|
+
def _rpc() -> bytes:
|
|
291
|
+
client: httpx.Client
|
|
292
|
+
with self._client_sync() as client:
|
|
293
|
+
response = client.post(self._service_descriptor.predict_url, **params)
|
|
294
|
+
utils.response_raise_errors(response, self.name)
|
|
295
|
+
return response.content
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
response_bytes = retry(_rpc)
|
|
299
|
+
except httpx.ReadTimeout:
|
|
300
|
+
msg = (
|
|
301
|
+
f"Timeout calling remote Chainlet `{self.name}` "
|
|
302
|
+
f"({self._service_descriptor.options.timeout_sec} seconds limit)."
|
|
303
|
+
)
|
|
304
|
+
logging.warning(msg)
|
|
305
|
+
raise TimeoutError(msg) from None # Prune error stack trace (TMI).
|
|
306
|
+
|
|
307
|
+
if output_model:
|
|
308
|
+
return self._response_to_pydantic(response_bytes, output_model)
|
|
309
|
+
return self._response_to_json(response_bytes)
|
|
310
|
+
|
|
311
|
+
@overload
|
|
312
|
+
async def predict_async(
|
|
313
|
+
self, inputs: InputT, output_model: Type[OutputModelT]
|
|
314
|
+
) -> OutputModelT: ...
|
|
315
|
+
|
|
316
|
+
@overload # Returns JSON.
|
|
317
|
+
async def predict_async(self, inputs: InputT, output_model: None = None) -> Any: ...
|
|
318
|
+
|
|
319
|
+
async def predict_async(
|
|
320
|
+
self, inputs: InputT, output_model: Optional[Type[OutputModelT]] = None
|
|
321
|
+
) -> Union[OutputModelT, Any]:
|
|
322
|
+
retry = self._make_retry_policy(tenacity.AsyncRetrying)
|
|
323
|
+
params = self._make_request_params(inputs)
|
|
324
|
+
|
|
325
|
+
async def _rpc() -> bytes:
|
|
326
|
+
client: aiohttp.ClientSession
|
|
327
|
+
async with self._client_async() as client:
|
|
328
|
+
async with client.post(
|
|
329
|
+
self._service_descriptor.predict_url, **params
|
|
330
|
+
) as response:
|
|
331
|
+
await utils.async_response_raise_errors(response, self.name)
|
|
332
|
+
return await response.read()
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
response_bytes: bytes = await retry(_rpc)
|
|
336
|
+
except asyncio.TimeoutError:
|
|
337
|
+
msg = (
|
|
338
|
+
f"Timeout calling remote Chainlet `{self.name}` "
|
|
339
|
+
f"({self._service_descriptor.options.timeout_sec} seconds limit)."
|
|
340
|
+
)
|
|
341
|
+
logging.warning(msg)
|
|
342
|
+
raise TimeoutError(msg) from None # Prune error stack trace (TMI).
|
|
343
|
+
|
|
344
|
+
if output_model:
|
|
345
|
+
return self._response_to_pydantic(response_bytes, output_model)
|
|
346
|
+
return self._response_to_json(response_bytes)
|
|
347
|
+
|
|
348
|
+
async def predict_async_stream(self, inputs: InputT) -> AsyncIterator[bytes]:
|
|
349
|
+
retry = self._make_retry_policy(tenacity.AsyncRetrying)
|
|
350
|
+
params = self._make_request_params(inputs)
|
|
351
|
+
|
|
352
|
+
async def _rpc() -> AsyncIterator[bytes]:
|
|
353
|
+
client: aiohttp.ClientSession
|
|
354
|
+
async with self._client_async() as client:
|
|
355
|
+
response = await client.post(
|
|
356
|
+
self._service_descriptor.predict_url, **params
|
|
357
|
+
)
|
|
358
|
+
await utils.async_response_raise_errors(response, self.name)
|
|
359
|
+
return response.content.iter_any()
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
return await retry(_rpc)
|
|
363
|
+
except asyncio.TimeoutError:
|
|
364
|
+
msg = (
|
|
365
|
+
f"Timeout calling remote Chainlet `{self.name}` "
|
|
366
|
+
f"({self._service_descriptor.options.timeout_sec} seconds limit)."
|
|
367
|
+
)
|
|
368
|
+
logging.warning(msg)
|
|
369
|
+
raise TimeoutError(msg) from None # Prune error stack trace (TMI).
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
StubT = TypeVar("StubT", bound=StubBase)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def factory(stub_cls: Type[StubT], context: definitions.DeploymentContext) -> StubT:
|
|
376
|
+
# Assumes the stub_cls-name and the name of the service in ``context` match.
|
|
377
|
+
return stub_cls(
|
|
378
|
+
service_descriptor=context.get_service_descriptor(stub_cls.__name__),
|
|
379
|
+
api_key=context.get_baseten_api_key(),
|
|
380
|
+
)
|