truss 0.10.0rc1__py3-none-any.whl → 0.60.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truss might be problematic. Click here for more details.

Files changed (362) hide show
  1. truss/__init__.py +10 -3
  2. truss/api/__init__.py +123 -0
  3. truss/api/definitions.py +51 -0
  4. truss/base/constants.py +116 -0
  5. truss/base/custom_types.py +29 -0
  6. truss/{errors.py → base/errors.py} +4 -0
  7. truss/base/trt_llm_config.py +310 -0
  8. truss/{truss_config.py → base/truss_config.py} +344 -31
  9. truss/{truss_spec.py → base/truss_spec.py} +20 -6
  10. truss/{validation.py → base/validation.py} +60 -11
  11. truss/cli/cli.py +841 -88
  12. truss/{remote → cli}/remote_cli.py +2 -7
  13. truss/contexts/docker_build_setup.py +67 -0
  14. truss/contexts/image_builder/cache_warmer.py +2 -8
  15. truss/contexts/image_builder/image_builder.py +1 -1
  16. truss/contexts/image_builder/serving_image_builder.py +292 -46
  17. truss/contexts/image_builder/util.py +1 -3
  18. truss/contexts/local_loader/docker_build_emulator.py +58 -0
  19. truss/contexts/local_loader/load_model_local.py +2 -2
  20. truss/contexts/local_loader/truss_module_loader.py +1 -1
  21. truss/contexts/local_loader/utils.py +1 -1
  22. truss/local/local_config.py +2 -6
  23. truss/local/local_config_handler.py +20 -5
  24. truss/patch/__init__.py +1 -0
  25. truss/patch/hash.py +4 -70
  26. truss/patch/signature.py +4 -16
  27. truss/patch/truss_dir_patch_applier.py +3 -78
  28. truss/remote/baseten/api.py +308 -23
  29. truss/remote/baseten/auth.py +3 -3
  30. truss/remote/baseten/core.py +257 -50
  31. truss/remote/baseten/custom_types.py +44 -0
  32. truss/remote/baseten/error.py +4 -0
  33. truss/remote/baseten/remote.py +369 -118
  34. truss/remote/baseten/service.py +118 -11
  35. truss/remote/baseten/utils/status.py +29 -0
  36. truss/remote/baseten/utils/tar.py +34 -22
  37. truss/remote/baseten/utils/transfer.py +36 -23
  38. truss/remote/remote_factory.py +14 -5
  39. truss/remote/truss_remote.py +72 -45
  40. truss/templates/base.Dockerfile.jinja +18 -16
  41. truss/templates/cache.Dockerfile.jinja +3 -3
  42. truss/{server → templates/control}/control/application.py +14 -35
  43. truss/{server → templates/control}/control/endpoints.py +39 -9
  44. truss/{server/control/patch/types.py → templates/control/control/helpers/custom_types.py} +13 -52
  45. truss/{server → templates/control}/control/helpers/inference_server_controller.py +4 -8
  46. truss/{server → templates/control}/control/helpers/inference_server_process_controller.py +2 -4
  47. truss/{server → templates/control}/control/helpers/inference_server_starter.py +5 -10
  48. truss/{server/control → templates/control/control/helpers}/truss_patch/model_code_patch_applier.py +8 -6
  49. truss/{server/control/patch → templates/control/control/helpers/truss_patch}/model_container_patch_applier.py +18 -26
  50. truss/templates/control/control/helpers/truss_patch/requirement_name_identifier.py +66 -0
  51. truss/{server → templates/control}/control/server.py +11 -6
  52. truss/templates/control/requirements.txt +9 -0
  53. truss/templates/custom_python_dx/my_model.py +28 -0
  54. truss/templates/docker_server/proxy.conf.jinja +42 -0
  55. truss/templates/docker_server/supervisord.conf.jinja +27 -0
  56. truss/templates/docker_server_requirements.txt +1 -0
  57. truss/templates/server/common/errors.py +231 -0
  58. truss/{server → templates/server}/common/patches/whisper/patch.py +1 -0
  59. truss/{server/common/patches/__init__.py → templates/server/common/patches.py} +1 -3
  60. truss/{server → templates/server}/common/retry.py +1 -0
  61. truss/{server → templates/server}/common/schema.py +11 -9
  62. truss/templates/server/common/tracing.py +157 -0
  63. truss/templates/server/main.py +9 -0
  64. truss/templates/server/model_wrapper.py +961 -0
  65. truss/templates/server/requirements.txt +21 -0
  66. truss/templates/server/truss_server.py +447 -0
  67. truss/templates/server.Dockerfile.jinja +62 -14
  68. truss/templates/shared/dynamic_config_resolver.py +28 -0
  69. truss/templates/shared/lazy_data_resolver.py +164 -0
  70. truss/templates/shared/log_config.py +125 -0
  71. truss/{server → templates}/shared/secrets_resolver.py +1 -2
  72. truss/{server → templates}/shared/serialization.py +31 -9
  73. truss/{server → templates}/shared/util.py +3 -13
  74. truss/templates/trtllm-audio/model/model.py +49 -0
  75. truss/templates/trtllm-audio/packages/sigint_patch.py +14 -0
  76. truss/templates/trtllm-audio/packages/whisper_trt/__init__.py +215 -0
  77. truss/templates/trtllm-audio/packages/whisper_trt/assets.py +25 -0
  78. truss/templates/trtllm-audio/packages/whisper_trt/batching.py +52 -0
  79. truss/templates/trtllm-audio/packages/whisper_trt/custom_types.py +26 -0
  80. truss/templates/trtllm-audio/packages/whisper_trt/modeling.py +184 -0
  81. truss/templates/trtllm-audio/packages/whisper_trt/tokenizer.py +185 -0
  82. truss/templates/trtllm-audio/packages/whisper_trt/utils.py +245 -0
  83. truss/templates/trtllm-briton/src/extension.py +64 -0
  84. truss/tests/conftest.py +302 -94
  85. truss/tests/contexts/image_builder/test_serving_image_builder.py +74 -31
  86. truss/tests/contexts/local_loader/test_load_local.py +2 -2
  87. truss/tests/contexts/local_loader/test_truss_module_finder.py +1 -1
  88. truss/tests/patch/test_calc_patch.py +439 -127
  89. truss/tests/patch/test_dir_signature.py +3 -12
  90. truss/tests/patch/test_hash.py +1 -1
  91. truss/tests/patch/test_signature.py +1 -1
  92. truss/tests/patch/test_truss_dir_patch_applier.py +23 -11
  93. truss/tests/patch/test_types.py +2 -2
  94. truss/tests/remote/baseten/test_api.py +153 -58
  95. truss/tests/remote/baseten/test_auth.py +2 -1
  96. truss/tests/remote/baseten/test_core.py +160 -12
  97. truss/tests/remote/baseten/test_remote.py +489 -77
  98. truss/tests/remote/baseten/test_service.py +55 -0
  99. truss/tests/remote/test_remote_factory.py +16 -18
  100. truss/tests/remote/test_truss_remote.py +26 -17
  101. truss/tests/templates/control/control/helpers/test_context_managers.py +11 -0
  102. truss/tests/templates/control/control/helpers/test_model_container_patch_applier.py +184 -0
  103. truss/tests/templates/control/control/helpers/test_requirement_name_identifier.py +89 -0
  104. truss/tests/{server → templates/control}/control/test_server.py +79 -24
  105. truss/tests/{server → templates/control}/control/test_server_integration.py +24 -16
  106. truss/tests/templates/core/server/test_dynamic_config_resolver.py +108 -0
  107. truss/tests/templates/core/server/test_lazy_data_resolver.py +329 -0
  108. truss/tests/templates/core/server/test_lazy_data_resolver_v2.py +79 -0
  109. truss/tests/{server → templates}/core/server/test_secrets_resolver.py +1 -1
  110. truss/tests/{server → templates/server}/common/test_retry.py +3 -3
  111. truss/tests/templates/server/test_model_wrapper.py +248 -0
  112. truss/tests/{server → templates/server}/test_schema.py +3 -5
  113. truss/tests/{server/core/server/common → templates/server}/test_truss_server.py +8 -5
  114. truss/tests/test_build.py +9 -52
  115. truss/tests/test_config.py +336 -77
  116. truss/tests/test_context_builder_image.py +3 -11
  117. truss/tests/test_control_truss_patching.py +7 -12
  118. truss/tests/test_custom_server.py +38 -0
  119. truss/tests/test_data/context_builder_image_test/test.py +3 -0
  120. truss/tests/test_data/happy.ipynb +56 -0
  121. truss/tests/test_data/model_load_failure_test/config.yaml +2 -0
  122. truss/tests/test_data/model_load_failure_test/model/__init__.py +0 -0
  123. truss/tests/test_data/patch_ping_test_server/__init__.py +0 -0
  124. truss/{test_data → tests/test_data}/patch_ping_test_server/app.py +3 -9
  125. truss/{test_data → tests/test_data}/server.Dockerfile +20 -21
  126. truss/tests/test_data/server_conformance_test_truss/__init__.py +0 -0
  127. truss/tests/test_data/server_conformance_test_truss/model/__init__.py +0 -0
  128. truss/{test_data → tests/test_data}/server_conformance_test_truss/model/model.py +1 -3
  129. truss/tests/test_data/test_async_truss/__init__.py +0 -0
  130. truss/tests/test_data/test_async_truss/model/__init__.py +0 -0
  131. truss/tests/test_data/test_basic_truss/__init__.py +0 -0
  132. truss/tests/test_data/test_basic_truss/config.yaml +16 -0
  133. truss/tests/test_data/test_basic_truss/model/__init__.py +0 -0
  134. truss/tests/test_data/test_build_commands/__init__.py +0 -0
  135. truss/tests/test_data/test_build_commands/config.yaml +13 -0
  136. truss/tests/test_data/test_build_commands/model/__init__.py +0 -0
  137. truss/{test_data/test_streaming_async_generator_truss → tests/test_data/test_build_commands}/model/model.py +2 -3
  138. truss/tests/test_data/test_build_commands_failure/__init__.py +0 -0
  139. truss/tests/test_data/test_build_commands_failure/config.yaml +14 -0
  140. truss/tests/test_data/test_build_commands_failure/model/__init__.py +0 -0
  141. truss/tests/test_data/test_build_commands_failure/model/model.py +17 -0
  142. truss/tests/test_data/test_concurrency_truss/__init__.py +0 -0
  143. truss/tests/test_data/test_concurrency_truss/config.yaml +4 -0
  144. truss/tests/test_data/test_concurrency_truss/model/__init__.py +0 -0
  145. truss/tests/test_data/test_custom_server_truss/__init__.py +0 -0
  146. truss/tests/test_data/test_custom_server_truss/config.yaml +20 -0
  147. truss/tests/test_data/test_custom_server_truss/test_docker_image/Dockerfile +17 -0
  148. truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md +10 -0
  149. truss/tests/test_data/test_custom_server_truss/test_docker_image/VERSION +1 -0
  150. truss/tests/test_data/test_custom_server_truss/test_docker_image/__init__.py +0 -0
  151. truss/tests/test_data/test_custom_server_truss/test_docker_image/app.py +19 -0
  152. truss/tests/test_data/test_custom_server_truss/test_docker_image/build_upload_new_image.sh +6 -0
  153. truss/tests/test_data/test_openai/__init__.py +0 -0
  154. truss/{test_data/test_basic_truss → tests/test_data/test_openai}/config.yaml +1 -2
  155. truss/tests/test_data/test_openai/model/__init__.py +0 -0
  156. truss/tests/test_data/test_openai/model/model.py +15 -0
  157. truss/tests/test_data/test_pyantic_v1/__init__.py +0 -0
  158. truss/tests/test_data/test_pyantic_v1/model/__init__.py +0 -0
  159. truss/tests/test_data/test_pyantic_v1/model/model.py +28 -0
  160. truss/tests/test_data/test_pyantic_v1/requirements.txt +1 -0
  161. truss/tests/test_data/test_pyantic_v2/__init__.py +0 -0
  162. truss/tests/test_data/test_pyantic_v2/config.yaml +13 -0
  163. truss/tests/test_data/test_pyantic_v2/model/__init__.py +0 -0
  164. truss/tests/test_data/test_pyantic_v2/model/model.py +30 -0
  165. truss/tests/test_data/test_pyantic_v2/requirements.txt +1 -0
  166. truss/tests/test_data/test_requirements_file_truss/__init__.py +0 -0
  167. truss/tests/test_data/test_requirements_file_truss/config.yaml +13 -0
  168. truss/tests/test_data/test_requirements_file_truss/model/__init__.py +0 -0
  169. truss/{test_data → tests/test_data}/test_requirements_file_truss/model/model.py +1 -0
  170. truss/tests/test_data/test_streaming_async_generator_truss/__init__.py +0 -0
  171. truss/tests/test_data/test_streaming_async_generator_truss/config.yaml +4 -0
  172. truss/tests/test_data/test_streaming_async_generator_truss/model/__init__.py +0 -0
  173. truss/tests/test_data/test_streaming_async_generator_truss/model/model.py +7 -0
  174. truss/tests/test_data/test_streaming_read_timeout/__init__.py +0 -0
  175. truss/tests/test_data/test_streaming_read_timeout/model/__init__.py +0 -0
  176. truss/tests/test_data/test_streaming_truss/__init__.py +0 -0
  177. truss/tests/test_data/test_streaming_truss/config.yaml +4 -0
  178. truss/tests/test_data/test_streaming_truss/model/__init__.py +0 -0
  179. truss/tests/test_data/test_streaming_truss_with_error/__init__.py +0 -0
  180. truss/tests/test_data/test_streaming_truss_with_error/model/__init__.py +0 -0
  181. truss/{test_data → tests/test_data}/test_streaming_truss_with_error/model/model.py +3 -11
  182. truss/tests/test_data/test_streaming_truss_with_error/packages/__init__.py +0 -0
  183. truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_1.py +5 -0
  184. truss/tests/test_data/test_streaming_truss_with_error/packages/helpers_2.py +2 -0
  185. truss/tests/test_data/test_streaming_truss_with_tracing/__init__.py +0 -0
  186. truss/tests/test_data/test_streaming_truss_with_tracing/config.yaml +43 -0
  187. truss/tests/test_data/test_streaming_truss_with_tracing/model/__init__.py +0 -0
  188. truss/tests/test_data/test_streaming_truss_with_tracing/model/model.py +65 -0
  189. truss/tests/test_data/test_trt_llm_truss/__init__.py +0 -0
  190. truss/tests/test_data/test_trt_llm_truss/config.yaml +15 -0
  191. truss/tests/test_data/test_trt_llm_truss/model/__init__.py +0 -0
  192. truss/tests/test_data/test_trt_llm_truss/model/model.py +15 -0
  193. truss/tests/test_data/test_truss/__init__.py +0 -0
  194. truss/tests/test_data/test_truss/config.yaml +4 -0
  195. truss/tests/test_data/test_truss/model/__init__.py +0 -0
  196. truss/tests/test_data/test_truss/model/dummy +0 -0
  197. truss/tests/test_data/test_truss/packages/__init__.py +0 -0
  198. truss/tests/test_data/test_truss/packages/test_package/__init__.py +0 -0
  199. truss/tests/test_data/test_truss_server_caching_truss/__init__.py +0 -0
  200. truss/tests/test_data/test_truss_server_caching_truss/model/__init__.py +0 -0
  201. truss/tests/test_data/test_truss_with_error/__init__.py +0 -0
  202. truss/tests/test_data/test_truss_with_error/config.yaml +4 -0
  203. truss/tests/test_data/test_truss_with_error/model/__init__.py +0 -0
  204. truss/tests/test_data/test_truss_with_error/model/model.py +8 -0
  205. truss/tests/test_data/test_truss_with_error/packages/__init__.py +0 -0
  206. truss/tests/test_data/test_truss_with_error/packages/helpers_1.py +5 -0
  207. truss/tests/test_data/test_truss_with_error/packages/helpers_2.py +2 -0
  208. truss/tests/test_docker.py +2 -1
  209. truss/tests/test_model_inference.py +1340 -292
  210. truss/tests/test_model_schema.py +33 -26
  211. truss/tests/test_testing_utilities_for_other_tests.py +50 -5
  212. truss/tests/test_truss_gatherer.py +3 -5
  213. truss/tests/test_truss_handle.py +62 -59
  214. truss/tests/test_util.py +2 -1
  215. truss/tests/test_validation.py +15 -13
  216. truss/tests/trt_llm/test_trt_llm_config.py +41 -0
  217. truss/tests/trt_llm/test_validation.py +91 -0
  218. truss/tests/util/test_config_checks.py +40 -0
  219. truss/tests/util/test_env_vars.py +14 -0
  220. truss/tests/util/test_path.py +10 -23
  221. truss/trt_llm/config_checks.py +43 -0
  222. truss/trt_llm/validation.py +42 -0
  223. truss/truss_handle/__init__.py +0 -0
  224. truss/truss_handle/build.py +122 -0
  225. truss/{decorators.py → truss_handle/decorators.py} +1 -1
  226. truss/truss_handle/patch/__init__.py +0 -0
  227. truss/{patch → truss_handle/patch}/calc_patch.py +146 -92
  228. truss/{types.py → truss_handle/patch/custom_types.py} +35 -27
  229. truss/{patch → truss_handle/patch}/dir_signature.py +1 -1
  230. truss/truss_handle/patch/hash.py +71 -0
  231. truss/{patch → truss_handle/patch}/local_truss_patch_applier.py +6 -4
  232. truss/truss_handle/patch/signature.py +22 -0
  233. truss/truss_handle/patch/truss_dir_patch_applier.py +87 -0
  234. truss/{readme_generator.py → truss_handle/readme_generator.py} +3 -2
  235. truss/{truss_gatherer.py → truss_handle/truss_gatherer.py} +3 -2
  236. truss/{truss_handle.py → truss_handle/truss_handle.py} +174 -78
  237. truss/util/.truss_ignore +3 -0
  238. truss/{docker.py → util/docker.py} +6 -2
  239. truss/util/download.py +6 -15
  240. truss/util/env_vars.py +41 -0
  241. truss/util/log_utils.py +52 -0
  242. truss/util/path.py +20 -20
  243. truss/util/requirements.py +11 -0
  244. {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/METADATA +18 -16
  245. truss-0.60.0.dist-info/RECORD +324 -0
  246. {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/WHEEL +1 -1
  247. truss-0.60.0.dist-info/entry_points.txt +4 -0
  248. truss_chains/__init__.py +71 -0
  249. truss_chains/definitions.py +756 -0
  250. truss_chains/deployment/__init__.py +0 -0
  251. truss_chains/deployment/code_gen.py +816 -0
  252. truss_chains/deployment/deployment_client.py +871 -0
  253. truss_chains/framework.py +1480 -0
  254. truss_chains/public_api.py +231 -0
  255. truss_chains/py.typed +0 -0
  256. truss_chains/pydantic_numpy.py +131 -0
  257. truss_chains/reference_code/reference_chainlet.py +34 -0
  258. truss_chains/reference_code/reference_model.py +10 -0
  259. truss_chains/remote_chainlet/__init__.py +0 -0
  260. truss_chains/remote_chainlet/model_skeleton.py +60 -0
  261. truss_chains/remote_chainlet/stub.py +380 -0
  262. truss_chains/remote_chainlet/utils.py +332 -0
  263. truss_chains/streaming.py +378 -0
  264. truss_chains/utils.py +178 -0
  265. CODE_OF_CONDUCT.md +0 -131
  266. CONTRIBUTING.md +0 -48
  267. README.md +0 -137
  268. context_builder.Dockerfile +0 -24
  269. truss/blob/blob_backend.py +0 -10
  270. truss/blob/blob_backend_registry.py +0 -23
  271. truss/blob/http_public_blob_backend.py +0 -23
  272. truss/build/__init__.py +0 -2
  273. truss/build/build.py +0 -143
  274. truss/build/configure.py +0 -63
  275. truss/cli/__init__.py +0 -2
  276. truss/cli/console.py +0 -5
  277. truss/cli/create.py +0 -5
  278. truss/config/trt_llm.py +0 -81
  279. truss/constants.py +0 -61
  280. truss/model_inference.py +0 -123
  281. truss/patch/types.py +0 -30
  282. truss/pytest.ini +0 -7
  283. truss/server/common/errors.py +0 -100
  284. truss/server/common/termination_handler_middleware.py +0 -64
  285. truss/server/common/truss_server.py +0 -389
  286. truss/server/control/patch/model_code_patch_applier.py +0 -46
  287. truss/server/control/patch/requirement_name_identifier.py +0 -17
  288. truss/server/inference_server.py +0 -29
  289. truss/server/model_wrapper.py +0 -434
  290. truss/server/shared/logging.py +0 -81
  291. truss/templates/trtllm/model/model.py +0 -97
  292. truss/templates/trtllm/packages/build_engine_utils.py +0 -34
  293. truss/templates/trtllm/packages/constants.py +0 -11
  294. truss/templates/trtllm/packages/schema.py +0 -216
  295. truss/templates/trtllm/packages/tensorrt_llm_model_repository/ensemble/config.pbtxt +0 -246
  296. truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/1/model.py +0 -181
  297. truss/templates/trtllm/packages/tensorrt_llm_model_repository/postprocessing/config.pbtxt +0 -64
  298. truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/1/model.py +0 -260
  299. truss/templates/trtllm/packages/tensorrt_llm_model_repository/preprocessing/config.pbtxt +0 -99
  300. truss/templates/trtllm/packages/tensorrt_llm_model_repository/tensorrt_llm/config.pbtxt +0 -208
  301. truss/templates/trtllm/packages/triton_client.py +0 -150
  302. truss/templates/trtllm/packages/utils.py +0 -43
  303. truss/test_data/context_builder_image_test/test.py +0 -4
  304. truss/test_data/happy.ipynb +0 -54
  305. truss/test_data/model_load_failure_test/config.yaml +0 -2
  306. truss/test_data/test_concurrency_truss/config.yaml +0 -2
  307. truss/test_data/test_streaming_async_generator_truss/config.yaml +0 -2
  308. truss/test_data/test_streaming_truss/config.yaml +0 -3
  309. truss/test_data/test_truss/config.yaml +0 -2
  310. truss/tests/server/common/test_termination_handler_middleware.py +0 -93
  311. truss/tests/server/control/test_model_container_patch_applier.py +0 -203
  312. truss/tests/server/core/server/common/test_util.py +0 -19
  313. truss/tests/server/test_model_wrapper.py +0 -87
  314. truss/util/data_structures.py +0 -16
  315. truss-0.10.0rc1.dist-info/RECORD +0 -216
  316. truss-0.10.0rc1.dist-info/entry_points.txt +0 -3
  317. truss/{server/shared → base}/__init__.py +0 -0
  318. truss/{server → templates/control}/control/helpers/context_managers.py +0 -0
  319. truss/{server/control → templates/control/control/helpers}/errors.py +0 -0
  320. truss/{server/control/patch → templates/control/control/helpers/truss_patch}/__init__.py +0 -0
  321. truss/{server/control/patch → templates/control/control/helpers/truss_patch}/system_packages.py +0 -0
  322. truss/{test_data/annotated_types_truss/model → templates/server}/__init__.py +0 -0
  323. truss/{server → templates/server}/common/__init__.py +0 -0
  324. truss/{test_data/gcs_fix/model → templates/shared}/__init__.py +0 -0
  325. truss/templates/{trtllm → trtllm-briton}/README.md +0 -0
  326. truss/{test_data/server_conformance_test_truss/model → tests/test_data}/__init__.py +0 -0
  327. truss/{test_data/test_basic_truss/model → tests/test_data/annotated_types_truss}/__init__.py +0 -0
  328. truss/{test_data → tests/test_data}/annotated_types_truss/config.yaml +0 -0
  329. truss/{test_data/test_requirements_file_truss → tests/test_data/annotated_types_truss}/model/__init__.py +0 -0
  330. truss/{test_data → tests/test_data}/annotated_types_truss/model/model.py +0 -0
  331. truss/{test_data → tests/test_data}/auto-mpg.data +0 -0
  332. truss/{test_data → tests/test_data}/context_builder_image_test/Dockerfile +0 -0
  333. truss/{test_data/test_truss/model → tests/test_data/context_builder_image_test}/__init__.py +0 -0
  334. truss/{test_data/test_truss_server_caching_truss/model → tests/test_data/gcs_fix}/__init__.py +0 -0
  335. truss/{test_data → tests/test_data}/gcs_fix/config.yaml +0 -0
  336. truss/tests/{local → test_data/gcs_fix/model}/__init__.py +0 -0
  337. truss/{test_data → tests/test_data}/gcs_fix/model/model.py +0 -0
  338. truss/{test_data/test_truss/model/dummy → tests/test_data/model_load_failure_test/__init__.py} +0 -0
  339. truss/{test_data → tests/test_data}/model_load_failure_test/model/model.py +0 -0
  340. truss/{test_data → tests/test_data}/pima-indians-diabetes.csv +0 -0
  341. truss/{test_data → tests/test_data}/readme_int_example.md +0 -0
  342. truss/{test_data → tests/test_data}/readme_no_example.md +0 -0
  343. truss/{test_data → tests/test_data}/readme_str_example.md +0 -0
  344. truss/{test_data → tests/test_data}/server_conformance_test_truss/config.yaml +0 -0
  345. truss/{test_data → tests/test_data}/test_async_truss/config.yaml +0 -0
  346. truss/{test_data → tests/test_data}/test_async_truss/model/model.py +3 -3
  347. /truss/{test_data → tests/test_data}/test_basic_truss/model/model.py +0 -0
  348. /truss/{test_data → tests/test_data}/test_concurrency_truss/model/model.py +0 -0
  349. /truss/{test_data/test_requirements_file_truss → tests/test_data/test_pyantic_v1}/config.yaml +0 -0
  350. /truss/{test_data → tests/test_data}/test_requirements_file_truss/requirements.txt +0 -0
  351. /truss/{test_data → tests/test_data}/test_streaming_read_timeout/config.yaml +0 -0
  352. /truss/{test_data → tests/test_data}/test_streaming_read_timeout/model/model.py +0 -0
  353. /truss/{test_data → tests/test_data}/test_streaming_truss/model/model.py +0 -0
  354. /truss/{test_data → tests/test_data}/test_streaming_truss_with_error/config.yaml +0 -0
  355. /truss/{test_data → tests/test_data}/test_truss/examples.yaml +0 -0
  356. /truss/{test_data → tests/test_data}/test_truss/model/model.py +0 -0
  357. /truss/{test_data → tests/test_data}/test_truss/packages/test_package/test.py +0 -0
  358. /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/config.yaml +0 -0
  359. /truss/{test_data → tests/test_data}/test_truss_server_caching_truss/model/model.py +0 -0
  360. /truss/{patch → truss_handle/patch}/constants.py +0 -0
  361. /truss/{notebook.py → util/notebook.py} +0 -0
  362. {truss-0.10.0rc1.dist-info → truss-0.60.0.dist-info}/LICENSE +0 -0
@@ -1,260 +0,0 @@
1
- # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions
5
- # are met:
6
- # * Redistributions of source code must retain the above copyright
7
- # notice, this list of conditions and the following disclaimer.
8
- # * Redistributions in binary form must reproduce the above copyright
9
- # notice, this list of conditions and the following disclaimer in the
10
- # documentation and/or other materials provided with the distribution.
11
- # * Neither the name of NVIDIA CORPORATION nor the names of its
12
- # contributors may be used to endorse or promote products derived
13
- # from this software without specific prior written permission.
14
- #
15
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16
- # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
- # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19
- # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
- # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
- # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
- # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
-
27
- import csv
28
- import json
29
- import os
30
- from typing import List
31
-
32
- import numpy as np
33
- import triton_python_backend_utils as pb_utils
34
- from transformers import AutoTokenizer, LlamaTokenizer, T5Tokenizer
35
-
36
-
37
- class TritonPythonModel:
38
- """Your Python model must use the same class name. Every Python model
39
- that is created must have "TritonPythonModel" as the class name.
40
- """
41
-
42
- def initialize(self, args):
43
- """`initialize` is called only once when the model is being loaded.
44
- Implementing `initialize` function is optional. This function allows
45
- the model to initialize any state associated with this model.
46
- Parameters
47
- ----------
48
- args : dict
49
- Both keys and values are strings. The dictionary keys and values are:
50
- * model_config: A JSON string containing the model configuration
51
- * model_instance_kind: A string containing model instance kind
52
- * model_instance_device_id: A string containing model instance device ID
53
- * model_repository: Model repository path
54
- * model_version: Model version
55
- * model_name: Model name
56
- """
57
- # Parse model configs
58
- model_config = json.loads(args["model_config"])
59
- # NOTE: Keep this in sync with the truss model.py variable
60
- tokenizer_dir = os.environ["TRITON_TOKENIZER_REPOSITORY"]
61
- tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
62
- self.add_special_tokens = model_config["parameters"].get(
63
- "add_special_tokens", {"string_value": "false"}
64
- )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
65
-
66
- if tokenizer_type == "t5":
67
- self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
68
- elif tokenizer_type == "auto":
69
- self.tokenizer = AutoTokenizer.from_pretrained(
70
- tokenizer_dir, padding_side="left"
71
- )
72
- elif tokenizer_type == "llama":
73
- self.tokenizer = LlamaTokenizer.from_pretrained(
74
- tokenizer_dir, legacy=False, padding_side="left"
75
- )
76
- else:
77
- raise AttributeError(f"Unexpected tokenizer type: {tokenizer_type}")
78
- self.tokenizer.pad_token = self.tokenizer.eos_token
79
-
80
- self.pad_id = self.tokenizer.encode(
81
- self.tokenizer.pad_token, add_special_tokens=False
82
- )[0]
83
-
84
- # Parse model output configs and convert Triton types to numpy types
85
- input_names = [
86
- "INPUT_ID",
87
- "REQUEST_INPUT_LEN",
88
- "BAD_WORDS_IDS",
89
- "STOP_WORDS_IDS",
90
- ]
91
- for input_name in input_names:
92
- setattr(
93
- self,
94
- input_name.lower() + "_dtype",
95
- pb_utils.triton_string_to_numpy(
96
- pb_utils.get_output_config_by_name(model_config, input_name)[
97
- "data_type"
98
- ]
99
- ),
100
- )
101
-
102
- def execute(self, requests):
103
- """`execute` must be implemented in every Python model. `execute`
104
- function receives a list of pb_utils.InferenceRequest as the only
105
- argument. This function is called when an inference is requested
106
- for this model. Depending on the batching configuration (e.g. Dynamic
107
- Batching) used, `requests` may contain multiple requests. Every
108
- Python model, must create one pb_utils.InferenceResponse for every
109
- pb_utils.InferenceRequest in `requests`. If there is an error, you can
110
- set the error argument when creating a pb_utils.InferenceResponse.
111
- Parameters
112
- ----------
113
- requests : list
114
- A list of pb_utils.InferenceRequest
115
- Returns
116
- -------
117
- list
118
- A list of pb_utils.InferenceResponse. The length of this list must
119
- be the same as `requests`
120
- """
121
-
122
- responses = []
123
-
124
- # Every Python backend must iterate over everyone of the requests
125
- # and create a pb_utils.InferenceResponse for each of them.
126
- for idx, request in enumerate(requests):
127
- # Get input tensors
128
- query = pb_utils.get_input_tensor_by_name(request, "QUERY").as_numpy()
129
- request_output_len = pb_utils.get_input_tensor_by_name(
130
- request, "REQUEST_OUTPUT_LEN"
131
- ).as_numpy()
132
-
133
- bad_words_dict = pb_utils.get_input_tensor_by_name(
134
- request, "BAD_WORDS_DICT"
135
- ).as_numpy()
136
- stop_words_dict = pb_utils.get_input_tensor_by_name(
137
- request, "STOP_WORDS_DICT"
138
- ).as_numpy()
139
-
140
- # Preprocessing input data.
141
- input_id, request_input_len = self._create_request(query)
142
- bad_words = self._to_word_list_format(bad_words_dict)
143
- stop_words = self._to_word_list_format(stop_words_dict)
144
-
145
- # Create output tensors. You need pb_utils.Tensor
146
- # objects to create pb_utils.InferenceResponse.
147
- input_id_tensor = pb_utils.Tensor(
148
- "INPUT_ID", np.array(input_id).astype(self.input_id_dtype)
149
- )
150
- request_input_len_tensor = pb_utils.Tensor(
151
- "REQUEST_INPUT_LEN",
152
- np.array(request_input_len).astype(self.request_input_len_dtype),
153
- )
154
- request_output_len_tensor = pb_utils.Tensor(
155
- "REQUEST_OUTPUT_LEN", request_output_len
156
- )
157
- bad_words_ids_tensor = pb_utils.Tensor("BAD_WORDS_IDS", bad_words)
158
- stop_words_ids_tensor = pb_utils.Tensor("STOP_WORDS_IDS", stop_words)
159
-
160
- # Create InferenceResponse. You can set an error here in case
161
- # there was a problem with handling this inference request.
162
- # Below is an example of how you can set errors in inference
163
- # response:
164
- #
165
- # pb_utils.InferenceResponse(
166
- # output_tensors=..., TritonError("An error occurred"))
167
- inference_response = pb_utils.InferenceResponse(
168
- output_tensors=[
169
- input_id_tensor,
170
- bad_words_ids_tensor,
171
- stop_words_ids_tensor,
172
- request_input_len_tensor,
173
- request_output_len_tensor,
174
- ]
175
- )
176
- responses.append(inference_response)
177
-
178
- # You should return a list of pb_utils.InferenceResponse. Length
179
- # of this list must match the length of `requests` list.
180
- return responses
181
-
182
- def finalize(self):
183
- """`finalize` is called only once when the model is being unloaded.
184
- Implementing `finalize` function is optional. This function allows
185
- the model to perform any necessary clean ups before exit.
186
- """
187
- print("Cleaning up...")
188
-
189
- def _create_request(self, query):
190
- """
191
- query : batch string (2D numpy array)
192
- """
193
- start_ids = [
194
- np.array(
195
- self.tokenizer.encode(
196
- s[0].decode(), add_special_tokens=self.add_special_tokens
197
- )
198
- ).astype(int)
199
- for s in query
200
- ]
201
- start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
202
-
203
- max_len = 0
204
- for seq in start_ids:
205
- max_len = max(max_len, seq.shape[0])
206
- start_ids = np.stack(
207
- [
208
- np.pad(
209
- seq,
210
- (0, max_len - seq.shape[0]),
211
- "constant",
212
- constant_values=(0, self.pad_id),
213
- )
214
- for seq in start_ids
215
- ]
216
- )
217
-
218
- return start_ids, start_lengths
219
-
220
- def _to_word_list_format(self, word_dict: List[List[str]]):
221
- """
222
- format of word_dict
223
- len(word_dict) should be same to batch_size
224
- word_dict[i] means the words for batch i
225
- len(word_dict[i]) must be 1, which means it only contains 1 string
226
- This string can contains several sentences and split by ",".
227
- For example, if word_dict[2] = " I am happy, I am sad", then this function will return
228
- the ids for two short sentences " I am happy" and " I am sad".
229
- """
230
- assert self.tokenizer is not None, "need to set tokenizer"
231
-
232
- flat_ids = []
233
- offsets = []
234
- for word_dict_item in word_dict:
235
- item_flat_ids = []
236
- item_offsets = []
237
-
238
- if isinstance(word_dict_item[0], bytes):
239
- word_dict_item = [word_dict_item[0].decode()]
240
-
241
- words = list(csv.reader(word_dict_item))[0]
242
- for word in words:
243
- ids = self.tokenizer.encode(word)
244
-
245
- if len(ids) == 0:
246
- continue
247
-
248
- item_flat_ids += ids
249
- item_offsets.append(len(ids))
250
-
251
- flat_ids.append(np.array(item_flat_ids))
252
- offsets.append(np.cumsum(np.array(item_offsets)))
253
-
254
- pad_to = max(1, max(len(ids) for ids in flat_ids))
255
-
256
- for i, (ids, offs) in enumerate(zip(flat_ids, offsets)):
257
- flat_ids[i] = np.pad(ids, (0, pad_to - len(ids)), constant_values=0)
258
- offsets[i] = np.pad(offs, (0, pad_to - len(offs)), constant_values=-1)
259
-
260
- return np.array([flat_ids, offsets], dtype="int32").transpose((1, 0, 2))
@@ -1,99 +0,0 @@
1
- # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions
5
- # are met:
6
- # * Redistributions of source code must retain the above copyright
7
- # notice, this list of conditions and the following disclaimer.
8
- # * Redistributions in binary form must reproduce the above copyright
9
- # notice, this list of conditions and the following disclaimer in the
10
- # documentation and/or other materials provided with the distribution.
11
- # * Neither the name of NVIDIA CORPORATION nor the names of its
12
- # contributors may be used to endorse or promote products derived
13
- # from this software without specific prior written permission.
14
- #
15
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16
- # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
- # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19
- # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
- # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
- # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
- # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
-
27
- name: "preprocessing"
28
- backend: "python"
29
- max_batch_size: 2048
30
- input [
31
- {
32
- name: "QUERY"
33
- data_type: TYPE_STRING
34
- dims: [ -1 ]
35
- },
36
- {
37
- name: "BAD_WORDS_DICT"
38
- data_type: TYPE_STRING
39
- dims: [ -1 ]
40
- },
41
- {
42
- name: "STOP_WORDS_DICT"
43
- data_type: TYPE_STRING
44
- dims: [ -1 ]
45
- },
46
- {
47
- name: "REQUEST_OUTPUT_LEN"
48
- data_type: TYPE_UINT32
49
- dims: [ -1 ]
50
- }
51
- ]
52
- output [
53
- {
54
- name: "INPUT_ID"
55
- data_type: TYPE_INT32
56
- dims: [ -1 ]
57
- },
58
- {
59
- name: "REQUEST_INPUT_LEN"
60
- data_type: TYPE_INT32
61
- dims: [ 1 ]
62
- },
63
- {
64
- name: "BAD_WORDS_IDS"
65
- data_type: TYPE_INT32
66
- dims: [ 2, -1 ]
67
- },
68
- {
69
- name: "STOP_WORDS_IDS"
70
- data_type: TYPE_INT32
71
- dims: [ 2, -1 ]
72
- },
73
- {
74
- name: "REQUEST_OUTPUT_LEN"
75
- data_type: TYPE_UINT32
76
- dims: [ -1 ]
77
- }
78
- ]
79
-
80
- parameters {
81
- key: "tokenizer_dir"
82
- value: {
83
- string_value: "NousResearch/Llama-2-7b-hf"
84
- }
85
- }
86
-
87
- parameters {
88
- key: "tokenizer_type"
89
- value: {
90
- string_value: "auto"
91
- }
92
- }
93
-
94
- instance_group [
95
- {
96
- count: 1
97
- kind: KIND_CPU
98
- }
99
- ]
@@ -1,208 +0,0 @@
1
- # Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- #
3
- # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions
5
- # are met:
6
- # * Redistributions of source code must retain the above copyright
7
- # notice, this list of conditions and the following disclaimer.
8
- # * Redistributions in binary form must reproduce the above copyright
9
- # notice, this list of conditions and the following disclaimer in the
10
- # documentation and/or other materials provided with the distribution.
11
- # * Neither the name of NVIDIA CORPORATION nor the names of its
12
- # contributors may be used to endorse or promote products derived
13
- # from this software without specific prior written permission.
14
- #
15
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16
- # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
- # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19
- # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
- # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
- # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23
- # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
-
27
- name: "tensorrt_llm"
28
- backend: "tensorrtllm"
29
- max_batch_size: 2048
30
-
31
- model_transaction_policy {
32
- decoupled: True
33
- }
34
-
35
- input [
36
- {
37
- name: "input_ids"
38
- data_type: TYPE_INT32
39
- dims: [ -1 ]
40
- },
41
- {
42
- name: "input_lengths"
43
- data_type: TYPE_INT32
44
- dims: [ 1 ]
45
- reshape: { shape: [ ] }
46
- },
47
- {
48
- name: "request_output_len"
49
- data_type: TYPE_UINT32
50
- dims: [ 1 ]
51
- },
52
- {
53
- name: "end_id"
54
- data_type: TYPE_UINT32
55
- dims: [ 1 ]
56
- reshape: { shape: [ ] }
57
- optional: true
58
- },
59
- {
60
- name: "pad_id"
61
- data_type: TYPE_UINT32
62
- dims: [ 1 ]
63
- reshape: { shape: [ ] }
64
- optional: true
65
- },
66
- {
67
- name: "beam_width"
68
- data_type: TYPE_UINT32
69
- dims: [ 1 ]
70
- reshape: { shape: [ ] }
71
- optional: true
72
- },
73
- {
74
- name: "temperature"
75
- data_type: TYPE_FP32
76
- dims: [ 1 ]
77
- reshape: { shape: [ ] }
78
- optional: true
79
- },
80
- {
81
- name: "runtime_top_k"
82
- data_type: TYPE_UINT32
83
- dims: [ 1 ]
84
- reshape: { shape: [ ] }
85
- optional: true
86
- },
87
- {
88
- name: "runtime_top_p"
89
- data_type: TYPE_FP32
90
- dims: [ 1 ]
91
- reshape: { shape: [ ] }
92
- optional: true
93
- },
94
- {
95
- name: "len_penalty"
96
- data_type: TYPE_FP32
97
- dims: [ 1 ]
98
- reshape: { shape: [ ] }
99
- optional: true
100
- },
101
- {
102
- name: "repetition_penalty"
103
- data_type: TYPE_FP32
104
- dims: [ 1 ]
105
- reshape: { shape: [ ] }
106
- optional: true
107
- },
108
- {
109
- name: "min_length"
110
- data_type: TYPE_UINT32
111
- dims: [ 1 ]
112
- reshape: { shape: [ ] }
113
- optional: true
114
- },
115
- {
116
- name: "presence_penalty"
117
- data_type: TYPE_FP32
118
- dims: [ 1 ]
119
- reshape: { shape: [ ] }
120
- optional: true
121
- },
122
- {
123
- name: "random_seed"
124
- data_type: TYPE_UINT64
125
- dims: [ 1 ]
126
- reshape: { shape: [ ] }
127
- optional: true
128
- },
129
- {
130
- name: "stop"
131
- data_type: TYPE_BOOL
132
- dims: [ 1 ]
133
- optional: true
134
- },
135
- {
136
- name: "streaming"
137
- data_type: TYPE_BOOL
138
- dims: [ 1 ]
139
- optional: true
140
- }
141
- ]
142
- output [
143
- {
144
- name: "output_ids"
145
- data_type: TYPE_INT32
146
- dims: [ -1, -1 ]
147
- }
148
- ]
149
- instance_group [
150
- {
151
- count: 1
152
- kind : KIND_CPU
153
- }
154
- ]
155
- parameters: {
156
- key: "max_beam_width"
157
- value: {
158
- string_value: "1"
159
- }
160
- }
161
- parameters: {
162
- key: "FORCE_CPU_ONLY_INPUT_TENSORS"
163
- value: {
164
- string_value: "no"
165
- }
166
- }
167
- parameters: {
168
- key: "gpt_model_type"
169
- value: {
170
- string_value: "inflight_fused_batching"
171
- }
172
- }
173
- parameters: {
174
- key: "gpt_model_path"
175
- value: {
176
- string_value: "/app/packages/tensorrt_llm_model_repository/tensorrt_llm/1"
177
- }
178
- }
179
- parameters: {
180
- key: "max_tokens_in_paged_kv_cache"
181
- value: {
182
- string_value: "100000"
183
- }
184
- }
185
- parameters: {
186
- key: "batch_scheduler_policy"
187
- value: {
188
- string_value: "max_utilization"
189
- }
190
- }
191
- parameters: {
192
- key: "kv_cache_free_gpu_mem_fraction"
193
- value: {
194
- string_value: "0.9"
195
- }
196
- }
197
- parameters: {
198
- key: "max_num_sequences"
199
- value: {
200
- string_value: "2048"
201
- }
202
- }
203
- parameters: {
204
- key: "enable_trt_overlap"
205
- value: {
206
- string_value: "False"
207
- }
208
- }