xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show
  1. xinference/_compat.py +24 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +219 -77
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/chat_interface.py +6 -1
  7. xinference/core/model.py +124 -34
  8. xinference/core/supervisor.py +180 -12
  9. xinference/core/utils.py +73 -4
  10. xinference/core/worker.py +102 -4
  11. xinference/deploy/cmdline.py +3 -1
  12. xinference/deploy/test/test_cmdline.py +56 -0
  13. xinference/isolation.py +24 -0
  14. xinference/model/audio/__init__.py +12 -0
  15. xinference/model/audio/core.py +37 -4
  16. xinference/model/audio/cosyvoice.py +39 -6
  17. xinference/model/audio/f5tts.py +200 -0
  18. xinference/model/audio/f5tts_mlx.py +260 -0
  19. xinference/model/audio/fish_speech.py +70 -110
  20. xinference/model/audio/melotts.py +110 -0
  21. xinference/model/audio/model_spec.json +179 -3
  22. xinference/model/audio/model_spec_modelscope.json +27 -0
  23. xinference/model/audio/utils.py +32 -0
  24. xinference/model/audio/whisper.py +35 -10
  25. xinference/model/audio/whisper_mlx.py +208 -0
  26. xinference/model/embedding/core.py +322 -6
  27. xinference/model/embedding/model_spec.json +8 -1
  28. xinference/model/embedding/model_spec_modelscope.json +9 -1
  29. xinference/model/image/core.py +69 -1
  30. xinference/model/image/model_spec.json +145 -4
  31. xinference/model/image/model_spec_modelscope.json +150 -4
  32. xinference/model/image/stable_diffusion/core.py +50 -15
  33. xinference/model/llm/__init__.py +6 -2
  34. xinference/model/llm/llm_family.json +1055 -93
  35. xinference/model/llm/llm_family.py +15 -36
  36. xinference/model/llm/llm_family_modelscope.json +1031 -78
  37. xinference/model/llm/memory.py +1 -1
  38. xinference/model/llm/mlx/core.py +285 -47
  39. xinference/model/llm/sglang/core.py +2 -0
  40. xinference/model/llm/transformers/chatglm.py +9 -5
  41. xinference/model/llm/transformers/cogagent.py +272 -0
  42. xinference/model/llm/transformers/core.py +3 -0
  43. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  44. xinference/model/llm/transformers/qwen2_vl.py +12 -1
  45. xinference/model/llm/transformers/utils.py +16 -8
  46. xinference/model/llm/utils.py +55 -4
  47. xinference/model/llm/vllm/core.py +137 -12
  48. xinference/model/llm/vllm/xavier/__init__.py +13 -0
  49. xinference/model/llm/vllm/xavier/allocator.py +74 -0
  50. xinference/model/llm/vllm/xavier/block.py +111 -0
  51. xinference/model/llm/vllm/xavier/block_manager.py +71 -0
  52. xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
  53. xinference/model/llm/vllm/xavier/collective.py +74 -0
  54. xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
  55. xinference/model/llm/vllm/xavier/engine.py +247 -0
  56. xinference/model/llm/vllm/xavier/executor.py +134 -0
  57. xinference/model/llm/vllm/xavier/scheduler.py +438 -0
  58. xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
  59. xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
  60. xinference/model/llm/vllm/xavier/transfer.py +319 -0
  61. xinference/model/rerank/core.py +11 -4
  62. xinference/model/video/diffusers.py +14 -0
  63. xinference/model/video/model_spec.json +15 -0
  64. xinference/model/video/model_spec_modelscope.json +16 -0
  65. xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
  66. xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
  67. xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
  68. xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
  69. xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
  70. xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
  71. xinference/thirdparty/cosyvoice/bin/train.py +42 -8
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
  74. xinference/thirdparty/cosyvoice/cli/model.py +330 -80
  75. xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
  76. xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
  77. xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
  78. xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
  79. xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
  80. xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
  81. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
  84. xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
  85. xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  86. xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
  87. xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
  88. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
  89. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
  90. xinference/thirdparty/cosyvoice/utils/common.py +28 -1
  91. xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
  92. xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
  93. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
  94. xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
  95. xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
  96. xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
  97. xinference/thirdparty/f5_tts/api.py +166 -0
  98. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  99. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  100. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  101. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  102. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  103. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  104. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  105. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  106. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  107. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  108. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  109. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  110. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  111. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  112. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  113. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  114. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  115. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  116. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  117. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  118. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  119. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  120. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  121. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  122. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  123. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  124. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  125. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  126. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  127. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  128. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  129. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  130. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  131. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  132. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  133. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  134. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  135. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  136. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  137. xinference/thirdparty/f5_tts/train/README.md +77 -0
  138. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  139. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  140. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  141. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  142. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  143. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  144. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  145. xinference/thirdparty/f5_tts/train/train.py +75 -0
  146. xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
  147. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  148. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  149. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  150. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  151. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  152. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
  153. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  154. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  155. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
  156. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
  157. xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
  158. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
  159. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  160. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  161. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  162. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
  163. xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
  164. xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
  165. xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
  166. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  167. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  168. xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
  169. xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
  170. xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
  171. xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
  172. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
  173. xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
  174. xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
  175. xinference/thirdparty/fish_speech/tools/schema.py +170 -0
  176. xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
  177. xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
  178. xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
  179. xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
  180. xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
  181. xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
  182. xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
  183. xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
  184. xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
  185. xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
  186. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  187. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  188. xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
  189. xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
  190. xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
  191. xinference/thirdparty/matcha/utils/utils.py +2 -2
  192. xinference/thirdparty/melo/api.py +135 -0
  193. xinference/thirdparty/melo/app.py +61 -0
  194. xinference/thirdparty/melo/attentions.py +459 -0
  195. xinference/thirdparty/melo/commons.py +160 -0
  196. xinference/thirdparty/melo/configs/config.json +94 -0
  197. xinference/thirdparty/melo/data/example/metadata.list +20 -0
  198. xinference/thirdparty/melo/data_utils.py +413 -0
  199. xinference/thirdparty/melo/download_utils.py +67 -0
  200. xinference/thirdparty/melo/infer.py +25 -0
  201. xinference/thirdparty/melo/init_downloads.py +14 -0
  202. xinference/thirdparty/melo/losses.py +58 -0
  203. xinference/thirdparty/melo/main.py +36 -0
  204. xinference/thirdparty/melo/mel_processing.py +174 -0
  205. xinference/thirdparty/melo/models.py +1030 -0
  206. xinference/thirdparty/melo/modules.py +598 -0
  207. xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
  208. xinference/thirdparty/melo/monotonic_align/core.py +46 -0
  209. xinference/thirdparty/melo/preprocess_text.py +135 -0
  210. xinference/thirdparty/melo/split_utils.py +174 -0
  211. xinference/thirdparty/melo/text/__init__.py +35 -0
  212. xinference/thirdparty/melo/text/chinese.py +199 -0
  213. xinference/thirdparty/melo/text/chinese_bert.py +107 -0
  214. xinference/thirdparty/melo/text/chinese_mix.py +253 -0
  215. xinference/thirdparty/melo/text/cleaner.py +36 -0
  216. xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
  217. xinference/thirdparty/melo/text/cmudict.rep +129530 -0
  218. xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
  219. xinference/thirdparty/melo/text/english.py +284 -0
  220. xinference/thirdparty/melo/text/english_bert.py +39 -0
  221. xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
  222. xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
  223. xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
  224. xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
  225. xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
  226. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
  227. xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
  228. xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
  229. xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
  230. xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
  231. xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
  232. xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
  233. xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
  234. xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
  235. xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
  236. xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
  237. xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
  238. xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
  239. xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
  240. xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
  241. xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
  242. xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
  243. xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
  244. xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
  245. xinference/thirdparty/melo/text/french.py +94 -0
  246. xinference/thirdparty/melo/text/french_bert.py +39 -0
  247. xinference/thirdparty/melo/text/japanese.py +647 -0
  248. xinference/thirdparty/melo/text/japanese_bert.py +49 -0
  249. xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
  250. xinference/thirdparty/melo/text/korean.py +192 -0
  251. xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
  252. xinference/thirdparty/melo/text/spanish.py +122 -0
  253. xinference/thirdparty/melo/text/spanish_bert.py +39 -0
  254. xinference/thirdparty/melo/text/symbols.py +290 -0
  255. xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
  256. xinference/thirdparty/melo/train.py +635 -0
  257. xinference/thirdparty/melo/train.sh +19 -0
  258. xinference/thirdparty/melo/transforms.py +209 -0
  259. xinference/thirdparty/melo/utils.py +424 -0
  260. xinference/types.py +17 -1
  261. xinference/web/ui/build/asset-manifest.json +6 -6
  262. xinference/web/ui/build/index.html +1 -1
  263. xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
  264. xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
  265. xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
  266. xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
  267. xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
  268. xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
  269. xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
  270. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
  271. xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
  272. xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
  273. xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
  274. xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
  275. xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
  276. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
  277. xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
  278. xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
  279. xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
  280. xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
  281. xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
  282. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
  283. xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
  284. xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
  285. xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
  286. xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
  287. xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
  288. xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
  289. xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
  290. xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
  291. xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
  292. xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
  293. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
  294. xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
  295. xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
  296. xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
  297. xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
  298. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
  299. xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
  300. xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
  301. xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
  302. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
  303. xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
  304. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
  305. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
  306. xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
  307. xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
  308. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
  309. xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
  310. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
  311. xinference/web/ui/node_modules/.package-lock.json +67 -3
  312. xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
  313. xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
  314. xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
  315. xinference/web/ui/node_modules/i18next/package.json +129 -0
  316. xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
  317. xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
  318. xinference/web/ui/node_modules/react-i18next/package.json +162 -0
  319. xinference/web/ui/node_modules/void-elements/package.json +34 -0
  320. xinference/web/ui/package-lock.json +69 -3
  321. xinference/web/ui/package.json +2 -0
  322. xinference/web/ui/src/locales/en.json +186 -0
  323. xinference/web/ui/src/locales/zh.json +186 -0
  324. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
  325. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
  326. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
  327. xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
  328. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  329. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  330. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  331. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  332. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  333. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  334. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  335. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  336. xinference/thirdparty/fish_speech/tools/api.py +0 -440
  337. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  338. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  339. xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
  340. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  341. xinference/thirdparty/fish_speech/tools/webui.py +0 -485
  342. xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
  343. xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
  344. xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
  345. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  346. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
  347. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
  348. xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
  349. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
  350. xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
  351. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
  352. xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
  353. xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
  354. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
  355. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
  356. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
  357. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  358. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
  359. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
  360. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
  361. xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
  362. xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
  363. xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
  364. xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
  365. /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
  366. /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
  367. /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
  368. /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
  369. /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
  370. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
  371. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
  372. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
  373. {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.45.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,8 +0,0 @@
1
- # TODO 跟export_jit一样的逻辑,完成flow部分的estimator的onnx导出。
2
- # tensorrt的安装方式,再这里写一下步骤提示如下,如果没有安装,那么不要执行这个脚本,提示用户先安装,不给选择
3
- try:
4
- import tensorrt
5
- except ImportError:
6
- print('step1, 下载\n step2. 解压,安装whl,')
7
- # 安装命令里tensosrt的根目录用环境变量导入,比如os.environ['tensorrt_root_dir']/bin/exetrace,然后python里subprocess里执行导出命令
8
- # 后面我会在run.sh里写好执行命令 tensorrt_root_dir=xxxx python cosyvoice/bin/export_trt.py --model_dir xxx
File without changes
@@ -1,440 +0,0 @@
1
- import base64
2
- import io
3
- import json
4
- import queue
5
- import random
6
- import sys
7
- import traceback
8
- import wave
9
- from argparse import ArgumentParser
10
- from http import HTTPStatus
11
- from pathlib import Path
12
- from typing import Annotated, Any, Literal, Optional
13
-
14
- import numpy as np
15
- import ormsgpack
16
- # import pyrootutils
17
- import soundfile as sf
18
- import torch
19
- import torchaudio
20
- # from baize.datastructures import ContentType
21
- # from kui.asgi import (
22
- # Body,
23
- # FactoryClass,
24
- # HTTPException,
25
- # HttpRequest,
26
- # HttpView,
27
- # JSONResponse,
28
- # Kui,
29
- # OpenAPI,
30
- # StreamResponse,
31
- # )
32
- # from kui.asgi.routing import MultimethodRoutes
33
- from loguru import logger
34
- from pydantic import BaseModel, Field, conint
35
-
36
- # pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
37
-
38
- # from fish_speech.models.vqgan.lit_module import VQGAN
39
- from fish_speech.models.vqgan.modules.firefly import FireflyArchitecture
40
- from fish_speech.text.chn_text_norm.text import Text as ChnNormedText
41
- from fish_speech.utils import autocast_exclude_mps
42
- from tools.commons import ServeReferenceAudio, ServeTTSRequest
43
- from tools.file import AUDIO_EXTENSIONS, audio_to_bytes, list_files, read_ref_text
44
- from tools.llama.generate import (
45
- GenerateRequest,
46
- GenerateResponse,
47
- WrappedGenerateResponse,
48
- launch_thread_safe_queue,
49
- )
50
- from tools.vqgan.inference import load_model as load_decoder_model
51
-
52
-
53
- def wav_chunk_header(sample_rate=44100, bit_depth=16, channels=1):
54
- buffer = io.BytesIO()
55
-
56
- with wave.open(buffer, "wb") as wav_file:
57
- wav_file.setnchannels(channels)
58
- wav_file.setsampwidth(bit_depth // 8)
59
- wav_file.setframerate(sample_rate)
60
-
61
- wav_header_bytes = buffer.getvalue()
62
- buffer.close()
63
- return wav_header_bytes
64
-
65
-
66
- # Define utils for web server
67
- # async def http_execption_handler(exc: HTTPException):
68
- # return JSONResponse(
69
- # dict(
70
- # statusCode=exc.status_code,
71
- # message=exc.content,
72
- # error=HTTPStatus(exc.status_code).phrase,
73
- # ),
74
- # exc.status_code,
75
- # exc.headers,
76
- # )
77
-
78
-
79
- async def other_exception_handler(exc: "Exception"):
80
- traceback.print_exc()
81
-
82
- status = HTTPStatus.INTERNAL_SERVER_ERROR
83
- return JSONResponse(
84
- dict(statusCode=status, message=str(exc), error=status.phrase),
85
- status,
86
- )
87
-
88
-
89
- def load_audio(reference_audio, sr):
90
- if len(reference_audio) > 255 or not Path(reference_audio).exists():
91
- audio_data = reference_audio
92
- reference_audio = io.BytesIO(audio_data)
93
-
94
- waveform, original_sr = torchaudio.load(
95
- reference_audio, backend="sox" if sys.platform == "linux" else "soundfile"
96
- )
97
-
98
- if waveform.shape[0] > 1:
99
- waveform = torch.mean(waveform, dim=0, keepdim=True)
100
-
101
- if original_sr != sr:
102
- resampler = torchaudio.transforms.Resample(orig_freq=original_sr, new_freq=sr)
103
- waveform = resampler(waveform)
104
-
105
- audio = waveform.squeeze().numpy()
106
- return audio
107
-
108
-
109
- def encode_reference(*, decoder_model, reference_audio, enable_reference_audio):
110
- if enable_reference_audio and reference_audio is not None:
111
- # Load audios, and prepare basic info here
112
- reference_audio_content = load_audio(
113
- reference_audio, decoder_model.spec_transform.sample_rate
114
- )
115
-
116
- audios = torch.from_numpy(reference_audio_content).to(decoder_model.device)[
117
- None, None, :
118
- ]
119
- audio_lengths = torch.tensor(
120
- [audios.shape[2]], device=decoder_model.device, dtype=torch.long
121
- )
122
- logger.info(
123
- f"Loaded audio with {audios.shape[2] / decoder_model.spec_transform.sample_rate:.2f} seconds"
124
- )
125
-
126
- # VQ Encoder
127
- if isinstance(decoder_model, FireflyArchitecture):
128
- prompt_tokens = decoder_model.encode(audios, audio_lengths)[0][0]
129
-
130
- logger.info(f"Encoded prompt: {prompt_tokens.shape}")
131
- else:
132
- prompt_tokens = None
133
- logger.info("No reference audio provided")
134
-
135
- return prompt_tokens
136
-
137
-
138
- def decode_vq_tokens(
139
- *,
140
- decoder_model,
141
- codes,
142
- ):
143
- feature_lengths = torch.tensor([codes.shape[1]], device=decoder_model.device)
144
- logger.info(f"VQ features: {codes.shape}")
145
-
146
- if isinstance(decoder_model, FireflyArchitecture):
147
- # VQGAN Inference
148
- return decoder_model.decode(
149
- indices=codes[None],
150
- feature_lengths=feature_lengths,
151
- )[0].squeeze()
152
-
153
- raise ValueError(f"Unknown model type: {type(decoder_model)}")
154
-
155
-
156
- # routes = MultimethodRoutes(base_class=HttpView)
157
-
158
-
159
- def get_content_type(audio_format):
160
- if audio_format == "wav":
161
- return "audio/wav"
162
- elif audio_format == "flac":
163
- return "audio/flac"
164
- elif audio_format == "mp3":
165
- return "audio/mpeg"
166
- else:
167
- return "application/octet-stream"
168
-
169
-
170
- @torch.inference_mode()
171
- def inference(req: ServeTTSRequest):
172
-
173
- idstr: str | None = req.reference_id
174
- if idstr is not None:
175
- ref_folder = Path("references") / idstr
176
- ref_folder.mkdir(parents=True, exist_ok=True)
177
- ref_audios = list_files(
178
- ref_folder, AUDIO_EXTENSIONS, recursive=True, sort=False
179
- )
180
- prompt_tokens = [
181
- encode_reference(
182
- decoder_model=decoder_model,
183
- reference_audio=audio_to_bytes(str(ref_audio)),
184
- enable_reference_audio=True,
185
- )
186
- for ref_audio in ref_audios
187
- ]
188
- prompt_texts = [
189
- read_ref_text(str(ref_audio.with_suffix(".lab")))
190
- for ref_audio in ref_audios
191
- ]
192
-
193
- else:
194
- # Parse reference audio aka prompt
195
- refs = req.references
196
- if refs is None:
197
- refs = []
198
- prompt_tokens = [
199
- encode_reference(
200
- decoder_model=decoder_model,
201
- reference_audio=ref.audio,
202
- enable_reference_audio=True,
203
- )
204
- for ref in refs
205
- ]
206
- prompt_texts = [ref.text for ref in refs]
207
-
208
- # LLAMA Inference
209
- request = dict(
210
- device=decoder_model.device,
211
- max_new_tokens=req.max_new_tokens,
212
- text=(
213
- req.text
214
- if not req.normalize
215
- else ChnNormedText(raw_text=req.text).normalize()
216
- ),
217
- top_p=req.top_p,
218
- repetition_penalty=req.repetition_penalty,
219
- temperature=req.temperature,
220
- compile=args.compile,
221
- iterative_prompt=req.chunk_length > 0,
222
- chunk_length=req.chunk_length,
223
- max_length=2048,
224
- prompt_tokens=prompt_tokens,
225
- prompt_text=prompt_texts,
226
- )
227
-
228
- response_queue = queue.Queue()
229
- llama_queue.put(
230
- GenerateRequest(
231
- request=request,
232
- response_queue=response_queue,
233
- )
234
- )
235
-
236
- if req.streaming:
237
- yield wav_chunk_header()
238
-
239
- segments = []
240
- while True:
241
- result: WrappedGenerateResponse = response_queue.get()
242
- if result.status == "error":
243
- raise result.response
244
- break
245
-
246
- result: GenerateResponse = result.response
247
- if result.action == "next":
248
- break
249
-
250
- with autocast_exclude_mps(
251
- device_type=decoder_model.device.type, dtype=args.precision
252
- ):
253
- fake_audios = decode_vq_tokens(
254
- decoder_model=decoder_model,
255
- codes=result.codes,
256
- )
257
-
258
- fake_audios = fake_audios.float().cpu().numpy()
259
-
260
- if req.streaming:
261
- yield (fake_audios * 32768).astype(np.int16).tobytes()
262
- else:
263
- segments.append(fake_audios)
264
-
265
- if req.streaming:
266
- return
267
-
268
- if len(segments) == 0:
269
- raise HTTPException(
270
- HTTPStatus.INTERNAL_SERVER_ERROR,
271
- content="No audio generated, please check the input text.",
272
- )
273
-
274
- fake_audios = np.concatenate(segments, axis=0)
275
- yield fake_audios
276
-
277
-
278
- async def inference_async(req: ServeTTSRequest):
279
- for chunk in inference(req):
280
- yield chunk
281
-
282
-
283
- async def buffer_to_async_generator(buffer):
284
- yield buffer
285
-
286
-
287
- # @routes.http.post("/v1/tts")
288
- # async def api_invoke_model(
289
- # req: Annotated[ServeTTSRequest, Body(exclusive=True)],
290
- # ):
291
- # """
292
- # Invoke model and generate audio
293
- # """
294
- #
295
- # if args.max_text_length > 0 and len(req.text) > args.max_text_length:
296
- # raise HTTPException(
297
- # HTTPStatus.BAD_REQUEST,
298
- # content=f"Text is too long, max length is {args.max_text_length}",
299
- # )
300
- #
301
- # if req.streaming and req.format != "wav":
302
- # raise HTTPException(
303
- # HTTPStatus.BAD_REQUEST,
304
- # content="Streaming only supports WAV format",
305
- # )
306
- #
307
- # if req.streaming:
308
- # return StreamResponse(
309
- # iterable=inference_async(req),
310
- # headers={
311
- # "Content-Disposition": f"attachment; filename=audio.{req.format}",
312
- # },
313
- # content_type=get_content_type(req.format),
314
- # )
315
- # else:
316
- # fake_audios = next(inference(req))
317
- # buffer = io.BytesIO()
318
- # sf.write(
319
- # buffer,
320
- # fake_audios,
321
- # decoder_model.spec_transform.sample_rate,
322
- # format=req.format,
323
- # )
324
- #
325
- # return StreamResponse(
326
- # iterable=buffer_to_async_generator(buffer.getvalue()),
327
- # headers={
328
- # "Content-Disposition": f"attachment; filename=audio.{req.format}",
329
- # },
330
- # content_type=get_content_type(req.format),
331
- # )
332
- #
333
- #
334
- # @routes.http.post("/v1/health")
335
- # async def api_health():
336
- # """
337
- # Health check
338
- # """
339
- #
340
- # return JSONResponse({"status": "ok"})
341
-
342
-
343
- def parse_args():
344
- parser = ArgumentParser()
345
- parser.add_argument(
346
- "--llama-checkpoint-path",
347
- type=str,
348
- default="checkpoints/fish-speech-1.4",
349
- )
350
- parser.add_argument(
351
- "--decoder-checkpoint-path",
352
- type=str,
353
- default="checkpoints/fish-speech-1.4/firefly-gan-vq-fsq-8x1024-21hz-generator.pth",
354
- )
355
- parser.add_argument("--decoder-config-name", type=str, default="firefly_gan_vq")
356
- parser.add_argument("--device", type=str, default="cuda")
357
- parser.add_argument("--half", action="store_true")
358
- parser.add_argument("--compile", action="store_true")
359
- parser.add_argument("--max-text-length", type=int, default=0)
360
- parser.add_argument("--listen", type=str, default="127.0.0.1:8080")
361
- parser.add_argument("--workers", type=int, default=1)
362
-
363
- return parser.parse_args()
364
-
365
-
366
- # Define Kui app
367
- # openapi = OpenAPI(
368
- # {
369
- # "title": "Fish Speech API",
370
- # },
371
- # ).routes
372
- #
373
- #
374
- # class MsgPackRequest(HttpRequest):
375
- # async def data(self) -> Annotated[Any, ContentType("application/msgpack")]:
376
- # if self.content_type == "application/msgpack":
377
- # return ormsgpack.unpackb(await self.body)
378
- #
379
- # raise HTTPException(
380
- # HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
381
- # headers={"Accept": "application/msgpack"},
382
- # )
383
- #
384
- #
385
- # app = Kui(
386
- # routes=routes + openapi[1:], # Remove the default route
387
- # exception_handlers={
388
- # HTTPException: http_execption_handler,
389
- # Exception: other_exception_handler,
390
- # },
391
- # factory_class=FactoryClass(http=MsgPackRequest),
392
- # cors_config={},
393
- # )
394
-
395
-
396
- if __name__ == "__main__":
397
-
398
- import uvicorn
399
-
400
- args = parse_args()
401
- args.precision = torch.half if args.half else torch.bfloat16
402
-
403
- logger.info("Loading Llama model...")
404
- llama_queue = launch_thread_safe_queue(
405
- checkpoint_path=args.llama_checkpoint_path,
406
- device=args.device,
407
- precision=args.precision,
408
- compile=args.compile,
409
- )
410
- logger.info("Llama model loaded, loading VQ-GAN model...")
411
-
412
- decoder_model = load_decoder_model(
413
- config_name=args.decoder_config_name,
414
- checkpoint_path=args.decoder_checkpoint_path,
415
- device=args.device,
416
- )
417
-
418
- logger.info("VQ-GAN model loaded, warming up...")
419
-
420
- # Dry run to check if the model is loaded correctly and avoid the first-time latency
421
- list(
422
- inference(
423
- ServeTTSRequest(
424
- text="Hello world.",
425
- references=[],
426
- reference_id=None,
427
- max_new_tokens=1024,
428
- chunk_length=200,
429
- top_p=0.7,
430
- repetition_penalty=1.2,
431
- temperature=0.7,
432
- emotion=None,
433
- format="wav",
434
- )
435
- )
436
- )
437
-
438
- logger.info(f"Warming up done, starting server at http://{args.listen}")
439
- host, port = args.listen.split(":")
440
- uvicorn.run(app, host=host, port=int(port), workers=args.workers, log_level="info")
@@ -1,35 +0,0 @@
1
- from typing import Annotated, Literal, Optional
2
-
3
- from pydantic import BaseModel, Field, conint
4
-
5
-
6
- class ServeReferenceAudio(BaseModel):
7
- audio: bytes
8
- text: str
9
-
10
-
11
- class ServeTTSRequest(BaseModel):
12
- text: str
13
- chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
14
- # Audio format
15
- format: Literal["wav", "pcm", "mp3"] = "wav"
16
- mp3_bitrate: Literal[64, 128, 192] = 128
17
- # References audios for in-context learning
18
- references: list[ServeReferenceAudio] = []
19
- # Reference id
20
- # For example, if you want use https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
21
- # Just pass 7f92f8afb8ec43bf81429cc1c9199cb1
22
- reference_id: str | None = None
23
- # Normalize text for en & zh, this increase stability for numbers
24
- normalize: bool = True
25
- mp3_bitrate: Optional[int] = 64
26
- opus_bitrate: Optional[int] = -1000
27
- # Balance mode will reduce latency to 300ms, but may decrease stability
28
- latency: Literal["normal", "balanced"] = "normal"
29
- # not usually used below
30
- streaming: bool = False
31
- emotion: Optional[str] = None
32
- max_new_tokens: int = 1024
33
- top_p: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7
34
- repetition_penalty: Annotated[float, Field(ge=0.9, le=2.0, strict=True)] = 1.2
35
- temperature: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7
File without changes
@@ -1,34 +0,0 @@
1
- import httpx
2
- import ormsgpack
3
-
4
- from tools.commons import ServeReferenceAudio, ServeTTSRequest
5
-
6
- # priority: ref_id > references
7
- request = ServeTTSRequest(
8
- text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
9
- # reference_id="114514",
10
- references=[
11
- ServeReferenceAudio(
12
- audio=open("lengyue.wav", "rb").read(),
13
- text=open("lengyue.lab", "r", encoding="utf-8").read(),
14
- )
15
- ],
16
- streaming=True,
17
- )
18
-
19
- with (
20
- httpx.Client() as client,
21
- open("hello.wav", "wb") as f,
22
- ):
23
- with client.stream(
24
- "POST",
25
- "http://127.0.0.1:8080/v1/tts",
26
- content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
27
- headers={
28
- "authorization": "Bearer YOUR_API_KEY",
29
- "content-type": "application/msgpack",
30
- },
31
- timeout=None,
32
- ) as response:
33
- for chunk in response.iter_bytes():
34
- f.write(chunk)
File without changes