xinference 1.0.1__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +2 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +77 -71
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +79 -19
- xinference/core/supervisor.py +172 -10
- xinference/core/utils.py +12 -8
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/core.py +16 -0
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +36 -111
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +99 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/embedding/core.py +203 -142
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +8 -0
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +45 -13
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/llm_family.json +536 -53
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +454 -20
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +248 -52
- xinference/model/llm/sglang/core.py +1 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +2 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +36 -4
- xinference/model/llm/vllm/core.py +53 -10
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +94 -83
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +63 -20
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +1 -26
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +7 -13
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +2 -2
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +117 -89
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +11 -28
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +15 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/METADATA +68 -32
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/RECORD +316 -122
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/tools/api.py +0 -943
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -95
- xinference/thirdparty/fish_speech/tools/webui.py +0 -548
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/tools → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/WHEEL +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.0.1.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -208,12 +208,14 @@ class EmbeddingModel:
|
|
|
208
208
|
]
|
|
209
209
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
210
210
|
|
|
211
|
-
|
|
211
|
+
if torch_dtype and torch_dtype == torch.float16:
|
|
212
|
+
model_kwargs = {"use_fp16": True}
|
|
213
|
+
else:
|
|
214
|
+
model_kwargs = {}
|
|
212
215
|
self._model = BGEM3FlagModel(
|
|
213
216
|
self._model_path,
|
|
214
217
|
device=self._device,
|
|
215
|
-
model_kwargs
|
|
216
|
-
trust_remote_code=True,
|
|
218
|
+
**model_kwargs,
|
|
217
219
|
)
|
|
218
220
|
else:
|
|
219
221
|
model_kwargs = {"torch_dtype": torch_dtype} if torch_dtype else None
|
|
@@ -224,7 +226,9 @@ class EmbeddingModel:
|
|
|
224
226
|
trust_remote_code=True,
|
|
225
227
|
)
|
|
226
228
|
|
|
227
|
-
def _fix_langchain_openai_inputs(
|
|
229
|
+
def _fix_langchain_openai_inputs(
|
|
230
|
+
self, sentences: Union[str, List[str], Dict[str, str], List[Dict[str, str]]]
|
|
231
|
+
):
|
|
228
232
|
# Check if sentences is a two-dimensional list of integers
|
|
229
233
|
if (
|
|
230
234
|
isinstance(sentences, list)
|
|
@@ -258,157 +262,172 @@ class EmbeddingModel:
|
|
|
258
262
|
sentences = lines_decoded
|
|
259
263
|
return sentences
|
|
260
264
|
|
|
261
|
-
def create_embedding(
|
|
265
|
+
def create_embedding(
|
|
266
|
+
self,
|
|
267
|
+
sentences: Union[str, List[str]],
|
|
268
|
+
**kwargs,
|
|
269
|
+
):
|
|
262
270
|
sentences = self._fix_langchain_openai_inputs(sentences)
|
|
263
271
|
|
|
264
|
-
from FlagEmbedding import BGEM3FlagModel
|
|
265
272
|
from sentence_transformers import SentenceTransformer
|
|
266
273
|
|
|
267
274
|
kwargs.setdefault("normalize_embeddings", True)
|
|
268
275
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
276
|
+
try:
|
|
277
|
+
from FlagEmbedding import BGEM3FlagModel
|
|
278
|
+
|
|
279
|
+
@no_type_check
|
|
280
|
+
def _encode_bgem3(
|
|
281
|
+
model: Union[SentenceTransformer, BGEM3FlagModel],
|
|
282
|
+
sentences: Union[str, List[str]],
|
|
283
|
+
batch_size: int = 32,
|
|
284
|
+
show_progress_bar: bool = None,
|
|
285
|
+
output_value: str = "sparse_embedding",
|
|
286
|
+
convert_to_numpy: bool = True,
|
|
287
|
+
convert_to_tensor: bool = False,
|
|
288
|
+
device: str = None,
|
|
289
|
+
normalize_embeddings: bool = False,
|
|
290
|
+
**kwargs,
|
|
291
|
+
):
|
|
292
|
+
"""
|
|
293
|
+
Computes sentence embeddings with bge-m3 model
|
|
294
|
+
Nothing special here, just replace sentence-transformer with FlagEmbedding
|
|
295
|
+
TODO: think about how to solve the redundant code of encode method in the future
|
|
296
|
+
|
|
297
|
+
:param sentences: the sentences to embed
|
|
298
|
+
:param batch_size: the batch size used for the computation
|
|
299
|
+
:param show_progress_bar: Output a progress bar when encode sentences
|
|
300
|
+
:param output_value: Default sentence_embedding, to get sentence embeddings. Can be set to token_embeddings to get wordpiece token embeddings. Set to None, to get all output values
|
|
301
|
+
:param convert_to_numpy: If true, the output is a list of numpy vectors. Else, it is a list of pytorch tensors.
|
|
302
|
+
:param convert_to_tensor: If true, you get one large tensor as return. Overwrites any setting from convert_to_numpy
|
|
303
|
+
:param device: Which torch.device to use for the computation
|
|
304
|
+
:param normalize_embeddings: If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
|
|
305
|
+
|
|
306
|
+
:return:
|
|
307
|
+
By default, a list of tensors is returned. If convert_to_tensor, a stacked tensor is returned. If convert_to_numpy, a numpy matrix is returned.
|
|
308
|
+
"""
|
|
309
|
+
import torch
|
|
310
|
+
from tqdm.autonotebook import trange
|
|
311
|
+
|
|
312
|
+
if show_progress_bar is None:
|
|
313
|
+
show_progress_bar = (
|
|
314
|
+
logger.getEffectiveLevel() == logging.INFO
|
|
315
|
+
or logger.getEffectiveLevel() == logging.DEBUG
|
|
316
|
+
)
|
|
295
317
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
318
|
+
if convert_to_tensor:
|
|
319
|
+
convert_to_numpy = False
|
|
320
|
+
|
|
321
|
+
if output_value != "sparse_embedding":
|
|
322
|
+
convert_to_tensor = False
|
|
323
|
+
convert_to_numpy = False
|
|
324
|
+
|
|
325
|
+
input_was_string = False
|
|
326
|
+
if isinstance(sentences, str) or not hasattr(
|
|
327
|
+
sentences, "__len__"
|
|
328
|
+
): # Cast an individual sentence to a list with length 1
|
|
329
|
+
sentences = [sentences]
|
|
330
|
+
input_was_string = True
|
|
331
|
+
|
|
332
|
+
if device is None:
|
|
333
|
+
# Same as SentenceTransformer.py
|
|
334
|
+
from sentence_transformers.util import get_device_name
|
|
335
|
+
|
|
336
|
+
device = get_device_name()
|
|
337
|
+
logger.info(f"Use pytorch device_name: {device}")
|
|
338
|
+
|
|
339
|
+
all_embeddings = []
|
|
340
|
+
all_token_nums = 0
|
|
341
|
+
|
|
342
|
+
# The original code does not support other inference engines
|
|
343
|
+
def _text_length(text):
|
|
344
|
+
if isinstance(text, dict): # {key: value} case
|
|
345
|
+
return len(next(iter(text.values())))
|
|
346
|
+
elif not hasattr(text, "__len__"): # Object has no len() method
|
|
347
|
+
return 1
|
|
348
|
+
elif len(text) == 0 or isinstance(
|
|
349
|
+
text[0], int
|
|
350
|
+
): # Empty string or list of ints
|
|
351
|
+
return len(text)
|
|
352
|
+
else:
|
|
353
|
+
return sum(
|
|
354
|
+
[len(t) for t in text]
|
|
355
|
+
) # Sum of length of individual strings
|
|
301
356
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
logger.getEffectiveLevel() == logging.INFO
|
|
305
|
-
or logger.getEffectiveLevel() == logging.DEBUG
|
|
357
|
+
length_sorted_idx = np.argsort(
|
|
358
|
+
[-_text_length(sen) for sen in sentences]
|
|
306
359
|
)
|
|
360
|
+
sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
|
|
361
|
+
|
|
362
|
+
for start_index in trange(
|
|
363
|
+
0,
|
|
364
|
+
len(sentences),
|
|
365
|
+
batch_size,
|
|
366
|
+
desc="Batches",
|
|
367
|
+
disable=not show_progress_bar,
|
|
368
|
+
):
|
|
369
|
+
sentences_batch = sentences_sorted[
|
|
370
|
+
start_index : start_index + batch_size
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
with torch.no_grad():
|
|
374
|
+
out_features = model.encode(sentences_batch, **kwargs)
|
|
375
|
+
|
|
376
|
+
if output_value == "token_embeddings":
|
|
377
|
+
embeddings = []
|
|
378
|
+
for token_emb, attention in zip(
|
|
379
|
+
out_features[output_value],
|
|
380
|
+
out_features["attention_mask"],
|
|
381
|
+
):
|
|
382
|
+
last_mask_id = len(attention) - 1
|
|
383
|
+
while (
|
|
384
|
+
last_mask_id > 0
|
|
385
|
+
and attention[last_mask_id].item() == 0
|
|
386
|
+
):
|
|
387
|
+
last_mask_id -= 1
|
|
388
|
+
|
|
389
|
+
embeddings.append(token_emb[0 : last_mask_id + 1])
|
|
390
|
+
elif output_value is None: # Return all outputs
|
|
391
|
+
embeddings = []
|
|
392
|
+
for sent_idx in range(
|
|
393
|
+
len(out_features["sentence_embedding"])
|
|
394
|
+
):
|
|
395
|
+
row = {
|
|
396
|
+
name: out_features[name][sent_idx]
|
|
397
|
+
for name in out_features
|
|
398
|
+
}
|
|
399
|
+
embeddings.append(row)
|
|
400
|
+
# for sparse embedding
|
|
401
|
+
else:
|
|
402
|
+
if kwargs.get("return_sparse"):
|
|
403
|
+
embeddings = out_features["lexical_weights"]
|
|
404
|
+
else:
|
|
405
|
+
embeddings = out_features["dense_vecs"]
|
|
307
406
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
if output_value != "sparse_embedding":
|
|
312
|
-
convert_to_tensor = False
|
|
313
|
-
convert_to_numpy = False
|
|
314
|
-
|
|
315
|
-
input_was_string = False
|
|
316
|
-
if isinstance(sentences, str) or not hasattr(
|
|
317
|
-
sentences, "__len__"
|
|
318
|
-
): # Cast an individual sentence to a list with length 1
|
|
319
|
-
sentences = [sentences]
|
|
320
|
-
input_was_string = True
|
|
321
|
-
|
|
322
|
-
if device is None:
|
|
323
|
-
# Same as SentenceTransformer.py
|
|
324
|
-
from sentence_transformers.util import get_device_name
|
|
325
|
-
|
|
326
|
-
device = get_device_name()
|
|
327
|
-
logger.info(f"Use pytorch device_name: {device}")
|
|
328
|
-
|
|
329
|
-
all_embeddings = []
|
|
330
|
-
all_token_nums = 0
|
|
331
|
-
|
|
332
|
-
# The original code does not support other inference engines
|
|
333
|
-
def _text_length(text):
|
|
334
|
-
if isinstance(text, dict): # {key: value} case
|
|
335
|
-
return len(next(iter(text.values())))
|
|
336
|
-
elif not hasattr(text, "__len__"): # Object has no len() method
|
|
337
|
-
return 1
|
|
338
|
-
elif len(text) == 0 or isinstance(
|
|
339
|
-
text[0], int
|
|
340
|
-
): # Empty string or list of ints
|
|
341
|
-
return len(text)
|
|
342
|
-
else:
|
|
343
|
-
return sum(
|
|
344
|
-
[len(t) for t in text]
|
|
345
|
-
) # Sum of length of individual strings
|
|
407
|
+
if convert_to_numpy:
|
|
408
|
+
embeddings = embeddings.cpu()
|
|
346
409
|
|
|
347
|
-
|
|
348
|
-
sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
|
|
410
|
+
all_embeddings.extend(embeddings)
|
|
349
411
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
len(sentences),
|
|
353
|
-
batch_size,
|
|
354
|
-
desc="Batches",
|
|
355
|
-
disable=not show_progress_bar,
|
|
356
|
-
):
|
|
357
|
-
sentences_batch = sentences_sorted[
|
|
358
|
-
start_index : start_index + batch_size
|
|
412
|
+
all_embeddings = [
|
|
413
|
+
all_embeddings[idx] for idx in np.argsort(length_sorted_idx)
|
|
359
414
|
]
|
|
360
415
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
if output_value == "token_embeddings":
|
|
365
|
-
embeddings = []
|
|
366
|
-
for token_emb, attention in zip(
|
|
367
|
-
out_features[output_value], out_features["attention_mask"]
|
|
368
|
-
):
|
|
369
|
-
last_mask_id = len(attention) - 1
|
|
370
|
-
while (
|
|
371
|
-
last_mask_id > 0 and attention[last_mask_id].item() == 0
|
|
372
|
-
):
|
|
373
|
-
last_mask_id -= 1
|
|
374
|
-
|
|
375
|
-
embeddings.append(token_emb[0 : last_mask_id + 1])
|
|
376
|
-
elif output_value is None: # Return all outputs
|
|
377
|
-
embeddings = []
|
|
378
|
-
for sent_idx in range(len(out_features["sentence_embedding"])):
|
|
379
|
-
row = {
|
|
380
|
-
name: out_features[name][sent_idx]
|
|
381
|
-
for name in out_features
|
|
382
|
-
}
|
|
383
|
-
embeddings.append(row)
|
|
384
|
-
# for sparse embedding
|
|
416
|
+
if convert_to_tensor:
|
|
417
|
+
if len(all_embeddings):
|
|
418
|
+
all_embeddings = torch.stack(all_embeddings)
|
|
385
419
|
else:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
embeddings = out_features["dense_vecs"]
|
|
390
|
-
|
|
391
|
-
if convert_to_numpy:
|
|
392
|
-
embeddings = embeddings.cpu()
|
|
393
|
-
|
|
394
|
-
all_embeddings.extend(embeddings)
|
|
395
|
-
|
|
396
|
-
all_embeddings = [
|
|
397
|
-
all_embeddings[idx] for idx in np.argsort(length_sorted_idx)
|
|
398
|
-
]
|
|
420
|
+
all_embeddings = torch.Tensor()
|
|
421
|
+
elif convert_to_numpy:
|
|
422
|
+
all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
|
|
399
423
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
all_embeddings = torch.stack(all_embeddings)
|
|
403
|
-
else:
|
|
404
|
-
all_embeddings = torch.Tensor()
|
|
405
|
-
elif convert_to_numpy:
|
|
406
|
-
all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
|
|
424
|
+
if input_was_string:
|
|
425
|
+
all_embeddings = all_embeddings[0]
|
|
407
426
|
|
|
408
|
-
|
|
409
|
-
all_embeddings = all_embeddings[0]
|
|
427
|
+
return all_embeddings, all_token_nums
|
|
410
428
|
|
|
411
|
-
|
|
429
|
+
except ImportError:
|
|
430
|
+
_encode_bgem3 = None
|
|
412
431
|
|
|
413
432
|
# copied from sentence-transformers, and modify it to return tokens num
|
|
414
433
|
@no_type_check
|
|
@@ -526,7 +545,11 @@ class EmbeddingModel:
|
|
|
526
545
|
features.update(extra_features)
|
|
527
546
|
# when batching, the attention mask 1 means there is a token
|
|
528
547
|
# thus we just sum up it to get the total number of tokens
|
|
529
|
-
|
|
548
|
+
if "clip" in self._model_spec.model_name.lower():
|
|
549
|
+
all_token_nums += features["input_ids"].numel()
|
|
550
|
+
all_token_nums += features["pixel_values"].numel()
|
|
551
|
+
else:
|
|
552
|
+
all_token_nums += features["attention_mask"].sum().item()
|
|
530
553
|
|
|
531
554
|
with torch.no_grad():
|
|
532
555
|
out_features = model.forward(features, **kwargs)
|
|
@@ -582,6 +605,10 @@ class EmbeddingModel:
|
|
|
582
605
|
|
|
583
606
|
return all_embeddings, all_token_nums
|
|
584
607
|
|
|
608
|
+
is_bge_m3_flag_model = (
|
|
609
|
+
self._kwargs.get("hybrid_mode")
|
|
610
|
+
and "m3" in self._model_spec.model_name.lower()
|
|
611
|
+
)
|
|
585
612
|
if (
|
|
586
613
|
"gte" in self._model_spec.model_name.lower()
|
|
587
614
|
and "qwen2" in self._model_spec.model_name.lower()
|
|
@@ -593,10 +620,45 @@ class EmbeddingModel:
|
|
|
593
620
|
convert_to_numpy=False,
|
|
594
621
|
**kwargs,
|
|
595
622
|
)
|
|
596
|
-
elif
|
|
623
|
+
elif is_bge_m3_flag_model:
|
|
624
|
+
assert _encode_bgem3 is not None
|
|
597
625
|
all_embeddings, all_token_nums = _encode_bgem3(
|
|
598
626
|
self._model, sentences, convert_to_numpy=False, **kwargs
|
|
599
627
|
)
|
|
628
|
+
elif "clip" in self._model_spec.model_name.lower():
|
|
629
|
+
import base64
|
|
630
|
+
import re
|
|
631
|
+
from io import BytesIO
|
|
632
|
+
|
|
633
|
+
from PIL import Image
|
|
634
|
+
|
|
635
|
+
def base64_to_image(base64_str: str) -> Image.Image:
|
|
636
|
+
# base64_data = re.sub("^data:image/.+;base64,", "", base64_str)
|
|
637
|
+
base64_data = base64_str.split(",", 1)[1]
|
|
638
|
+
byte_data = base64.b64decode(base64_data)
|
|
639
|
+
image_data = BytesIO(byte_data)
|
|
640
|
+
img = Image.open(image_data)
|
|
641
|
+
return img
|
|
642
|
+
|
|
643
|
+
objs: list[dict[str, str]] = []
|
|
644
|
+
for item in sentences:
|
|
645
|
+
if isinstance(item, dict):
|
|
646
|
+
if item.get("text") is not None:
|
|
647
|
+
objs.append(item["text"])
|
|
648
|
+
elif item.get("image") is not None:
|
|
649
|
+
if re.match(r"^data:image/.+;base64,", item["image"]):
|
|
650
|
+
image = base64_to_image(item["image"])
|
|
651
|
+
objs.append(image)
|
|
652
|
+
else:
|
|
653
|
+
objs.append(item["image"])
|
|
654
|
+
else:
|
|
655
|
+
logger.error("Please check the input data.")
|
|
656
|
+
all_embeddings, all_token_nums = encode(
|
|
657
|
+
self._model,
|
|
658
|
+
objs,
|
|
659
|
+
convert_to_numpy=False,
|
|
660
|
+
**self._kwargs,
|
|
661
|
+
)
|
|
600
662
|
else:
|
|
601
663
|
all_embeddings, all_token_nums = encode(
|
|
602
664
|
self._model,
|
|
@@ -608,7 +670,7 @@ class EmbeddingModel:
|
|
|
608
670
|
all_embeddings = [all_embeddings]
|
|
609
671
|
embedding_list = []
|
|
610
672
|
for index, data in enumerate(all_embeddings):
|
|
611
|
-
if kwargs.get("return_sparse") and
|
|
673
|
+
if kwargs.get("return_sparse") and is_bge_m3_flag_model:
|
|
612
674
|
embedding_list.append(
|
|
613
675
|
EmbeddingData(
|
|
614
676
|
index=index,
|
|
@@ -628,8 +690,7 @@ class EmbeddingModel:
|
|
|
628
690
|
result = Embedding(
|
|
629
691
|
object=(
|
|
630
692
|
"list" # type: ignore
|
|
631
|
-
if not
|
|
632
|
-
and not kwargs.get("return_sparse")
|
|
693
|
+
if not is_bge_m3_flag_model and not kwargs.get("return_sparse")
|
|
633
694
|
else "dict"
|
|
634
695
|
),
|
|
635
696
|
model=self._model_uid,
|
|
@@ -245,5 +245,12 @@
|
|
|
245
245
|
"max_tokens": 8192,
|
|
246
246
|
"language": ["zh", "en"],
|
|
247
247
|
"model_id": "jinaai/jina-embeddings-v3"
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
"model_name": "jina-clip-v2",
|
|
251
|
+
"dimensions": 1024,
|
|
252
|
+
"max_tokens": 8192,
|
|
253
|
+
"language": ["89 languages supported"],
|
|
254
|
+
"model_id": "jinaai/jina-clip-v2"
|
|
248
255
|
}
|
|
249
256
|
]
|
|
@@ -248,5 +248,13 @@
|
|
|
248
248
|
"language": ["zh", "en"],
|
|
249
249
|
"model_id": "jinaai/jina-embeddings-v3",
|
|
250
250
|
"model_hub": "modelscope"
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"model_name": "jina-clip-v2",
|
|
254
|
+
"dimensions": 1024,
|
|
255
|
+
"max_tokens": 8192,
|
|
256
|
+
"language": ["89 languages supported"],
|
|
257
|
+
"model_id": "jinaai/jina-clip-v2",
|
|
258
|
+
"model_hub": "modelscope"
|
|
251
259
|
}
|
|
252
260
|
]
|
xinference/model/image/core.py
CHANGED
|
@@ -22,7 +22,12 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
|
22
22
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
23
23
|
from ...types import PeftModelConfig
|
|
24
24
|
from ..core import CacheableModelSpec, ModelDescription
|
|
25
|
-
from ..utils import
|
|
25
|
+
from ..utils import (
|
|
26
|
+
IS_NEW_HUGGINGFACE_HUB,
|
|
27
|
+
retry_download,
|
|
28
|
+
symlink_local_file,
|
|
29
|
+
valid_model_revision,
|
|
30
|
+
)
|
|
26
31
|
from .ocr.got_ocr2 import GotOCR2Model
|
|
27
32
|
from .stable_diffusion.core import DiffusionModel
|
|
28
33
|
from .stable_diffusion.mlx import MLXDiffusionModel
|
|
@@ -51,6 +56,9 @@ class ImageModelFamilyV1(CacheableModelSpec):
|
|
|
51
56
|
controlnet: Optional[List["ImageModelFamilyV1"]]
|
|
52
57
|
default_model_config: Optional[dict] = {}
|
|
53
58
|
default_generate_config: Optional[dict] = {}
|
|
59
|
+
gguf_model_id: Optional[str]
|
|
60
|
+
gguf_quantizations: Optional[List[str]]
|
|
61
|
+
gguf_model_file_name_template: Optional[str]
|
|
54
62
|
|
|
55
63
|
|
|
56
64
|
class ImageModelDescription(ModelDescription):
|
|
@@ -187,6 +195,61 @@ def get_cache_status(
|
|
|
187
195
|
return valid_model_revision(meta_path, model_spec.model_revision)
|
|
188
196
|
|
|
189
197
|
|
|
198
|
+
def cache_gguf(spec: ImageModelFamilyV1, quantization: Optional[str] = None):
|
|
199
|
+
if not quantization:
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, spec.model_name))
|
|
203
|
+
if not os.path.exists(cache_dir):
|
|
204
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
if not spec.gguf_model_file_name_template:
|
|
207
|
+
raise NotImplementedError(
|
|
208
|
+
f"{spec.model_name} does not support GGUF quantization"
|
|
209
|
+
)
|
|
210
|
+
if quantization not in (spec.gguf_quantizations or []):
|
|
211
|
+
raise ValueError(
|
|
212
|
+
f"Cannot support quantization {quantization}, "
|
|
213
|
+
f"available quantizations: {spec.gguf_quantizations}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
filename = spec.gguf_model_file_name_template.format(quantization=quantization) # type: ignore
|
|
217
|
+
full_path = os.path.join(cache_dir, filename)
|
|
218
|
+
|
|
219
|
+
if spec.model_hub == "huggingface":
|
|
220
|
+
import huggingface_hub
|
|
221
|
+
|
|
222
|
+
use_symlinks = {}
|
|
223
|
+
if not IS_NEW_HUGGINGFACE_HUB:
|
|
224
|
+
use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
|
|
225
|
+
download_file_path = retry_download(
|
|
226
|
+
huggingface_hub.hf_hub_download,
|
|
227
|
+
spec.model_name,
|
|
228
|
+
None,
|
|
229
|
+
spec.gguf_model_id,
|
|
230
|
+
filename=filename,
|
|
231
|
+
**use_symlinks,
|
|
232
|
+
)
|
|
233
|
+
if IS_NEW_HUGGINGFACE_HUB:
|
|
234
|
+
symlink_local_file(download_file_path, cache_dir, filename)
|
|
235
|
+
elif spec.model_hub == "modelscope":
|
|
236
|
+
from modelscope.hub.file_download import model_file_download
|
|
237
|
+
|
|
238
|
+
download_file_path = retry_download(
|
|
239
|
+
model_file_download,
|
|
240
|
+
spec.model_name,
|
|
241
|
+
None,
|
|
242
|
+
spec.gguf_model_id,
|
|
243
|
+
filename,
|
|
244
|
+
revision=spec.model_revision,
|
|
245
|
+
)
|
|
246
|
+
symlink_local_file(download_file_path, cache_dir, filename)
|
|
247
|
+
else:
|
|
248
|
+
raise NotImplementedError
|
|
249
|
+
|
|
250
|
+
return full_path
|
|
251
|
+
|
|
252
|
+
|
|
190
253
|
def create_ocr_model_instance(
|
|
191
254
|
subpool_addr: str,
|
|
192
255
|
devices: List[str],
|
|
@@ -219,6 +282,8 @@ def create_image_model_instance(
|
|
|
219
282
|
Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
|
|
220
283
|
] = None,
|
|
221
284
|
model_path: Optional[str] = None,
|
|
285
|
+
gguf_quantization: Optional[str] = None,
|
|
286
|
+
gguf_model_path: Optional[str] = None,
|
|
222
287
|
**kwargs,
|
|
223
288
|
) -> Tuple[
|
|
224
289
|
Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
|
|
@@ -272,6 +337,8 @@ def create_image_model_instance(
|
|
|
272
337
|
]
|
|
273
338
|
if not model_path:
|
|
274
339
|
model_path = cache(model_spec)
|
|
340
|
+
if not gguf_model_path and gguf_quantization:
|
|
341
|
+
gguf_model_path = cache_gguf(model_spec, gguf_quantization)
|
|
275
342
|
if peft_model_config is not None:
|
|
276
343
|
lora_model = peft_model_config.peft_model
|
|
277
344
|
lora_load_kwargs = peft_model_config.image_lora_load_kwargs
|
|
@@ -298,6 +365,7 @@ def create_image_model_instance(
|
|
|
298
365
|
lora_load_kwargs=lora_load_kwargs,
|
|
299
366
|
lora_fuse_kwargs=lora_fuse_kwargs,
|
|
300
367
|
model_spec=model_spec,
|
|
368
|
+
gguf_model_path=gguf_model_path,
|
|
301
369
|
**kwargs,
|
|
302
370
|
)
|
|
303
371
|
model_description = ImageModelDescription(
|