xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
import gruut
|
|
5
|
+
from gruut_ipa import IPA # pip install gruut_ipa
|
|
6
|
+
|
|
7
|
+
from .base import BasePhonemizer
|
|
8
|
+
from .punctuation import Punctuation
|
|
9
|
+
|
|
10
|
+
# Table for str.translate to fix gruut/TTS phoneme mismatch
|
|
11
|
+
GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Gruut(BasePhonemizer):
|
|
15
|
+
"""Gruut wrapper for G2P
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
language (str):
|
|
19
|
+
Valid language code for the used backend.
|
|
20
|
+
|
|
21
|
+
punctuations (str):
|
|
22
|
+
Characters to be treated as punctuation. Defaults to `Punctuation.default_puncs()`.
|
|
23
|
+
|
|
24
|
+
keep_puncs (bool):
|
|
25
|
+
If true, keep the punctuations after phonemization. Defaults to True.
|
|
26
|
+
|
|
27
|
+
use_espeak_phonemes (bool):
|
|
28
|
+
If true, use espeak lexicons instead of default Gruut lexicons. Defaults to False.
|
|
29
|
+
|
|
30
|
+
keep_stress (bool):
|
|
31
|
+
If true, keep the stress characters after phonemization. Defaults to False.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
|
|
35
|
+
>>> from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
|
|
36
|
+
>>> phonemizer = Gruut('en-us')
|
|
37
|
+
>>> phonemizer.phonemize("Be a voice, not an! echo?", separator="|")
|
|
38
|
+
'b|i| ə| v|ɔ|ɪ|s, n|ɑ|t| ə|n! ɛ|k|o|ʊ?'
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
language: str,
|
|
44
|
+
punctuations=Punctuation.default_puncs(),
|
|
45
|
+
keep_puncs=True,
|
|
46
|
+
use_espeak_phonemes=False,
|
|
47
|
+
keep_stress=False,
|
|
48
|
+
):
|
|
49
|
+
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
|
|
50
|
+
self.use_espeak_phonemes = use_espeak_phonemes
|
|
51
|
+
self.keep_stress = keep_stress
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def name():
|
|
55
|
+
return "gruut"
|
|
56
|
+
|
|
57
|
+
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str: # pylint: disable=unused-argument
|
|
58
|
+
"""Convert input text to phonemes.
|
|
59
|
+
|
|
60
|
+
Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
|
|
61
|
+
that constitude a single sound.
|
|
62
|
+
|
|
63
|
+
It doesn't affect 🐸TTS since it individually converts each character to token IDs.
|
|
64
|
+
|
|
65
|
+
Examples::
|
|
66
|
+
"hello how are you today?" -> `h|ɛ|l|o|ʊ| h|a|ʊ| ɑ|ɹ| j|u| t|ə|d|e|ɪ`
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
text (str):
|
|
70
|
+
Text to be converted to phonemes.
|
|
71
|
+
|
|
72
|
+
tie (bool, optional) : When True use a '͡' character between
|
|
73
|
+
consecutive characters of a single phoneme. Else separate phoneme
|
|
74
|
+
with '_'. This option requires espeak>=1.49. Default to False.
|
|
75
|
+
"""
|
|
76
|
+
ph_list = []
|
|
77
|
+
for sentence in gruut.sentences(text, lang=self.language, espeak=self.use_espeak_phonemes):
|
|
78
|
+
for word in sentence:
|
|
79
|
+
if word.is_break:
|
|
80
|
+
# Use actual character for break phoneme (e.g., comma)
|
|
81
|
+
if ph_list:
|
|
82
|
+
# Join with previous word
|
|
83
|
+
ph_list[-1].append(word.text)
|
|
84
|
+
else:
|
|
85
|
+
# First word is punctuation
|
|
86
|
+
ph_list.append([word.text])
|
|
87
|
+
elif word.phonemes:
|
|
88
|
+
# Add phonemes for word
|
|
89
|
+
word_phonemes = []
|
|
90
|
+
|
|
91
|
+
for word_phoneme in word.phonemes:
|
|
92
|
+
if not self.keep_stress:
|
|
93
|
+
# Remove primary/secondary stress
|
|
94
|
+
word_phoneme = IPA.without_stress(word_phoneme)
|
|
95
|
+
|
|
96
|
+
word_phoneme = word_phoneme.translate(GRUUT_TRANS_TABLE)
|
|
97
|
+
|
|
98
|
+
if word_phoneme:
|
|
99
|
+
# Flatten phonemes
|
|
100
|
+
word_phonemes.extend(word_phoneme)
|
|
101
|
+
|
|
102
|
+
if word_phonemes:
|
|
103
|
+
ph_list.append(word_phonemes)
|
|
104
|
+
|
|
105
|
+
ph_words = [separator.join(word_phonemes) for word_phonemes in ph_list]
|
|
106
|
+
ph = f"{separator} ".join(ph_words)
|
|
107
|
+
return ph
|
|
108
|
+
|
|
109
|
+
def _phonemize(self, text, separator):
|
|
110
|
+
return self.phonemize_gruut(text, separator, tie=False)
|
|
111
|
+
|
|
112
|
+
def is_supported_language(self, language):
|
|
113
|
+
"""Returns True if `language` is supported by the backend"""
|
|
114
|
+
return gruut.is_language_supported(language)
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def supported_languages() -> List:
|
|
118
|
+
"""Get a dictionary of supported languages.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List: List of language codes.
|
|
122
|
+
"""
|
|
123
|
+
return list(gruut.get_supported_languages())
|
|
124
|
+
|
|
125
|
+
def version(self):
|
|
126
|
+
"""Get the version of the used backend.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
str: Version of the used backend.
|
|
130
|
+
"""
|
|
131
|
+
return gruut.__version__
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def is_available(cls):
|
|
135
|
+
"""Return true if ESpeak is available else false"""
|
|
136
|
+
return importlib.util.find_spec("gruut") is not None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
if __name__ == "__main__":
|
|
140
|
+
from es_to_ipa import es2ipa
|
|
141
|
+
import json
|
|
142
|
+
|
|
143
|
+
e = Gruut(language="es-es", keep_puncs=True, keep_stress=True, use_espeak_phonemes=True)
|
|
144
|
+
symbols = [
|
|
145
|
+
"_",
|
|
146
|
+
",",
|
|
147
|
+
".",
|
|
148
|
+
"!",
|
|
149
|
+
"?",
|
|
150
|
+
"-",
|
|
151
|
+
"~",
|
|
152
|
+
"\u2026",
|
|
153
|
+
"N",
|
|
154
|
+
"Q",
|
|
155
|
+
"a",
|
|
156
|
+
"b",
|
|
157
|
+
"d",
|
|
158
|
+
"e",
|
|
159
|
+
"f",
|
|
160
|
+
"g",
|
|
161
|
+
"h",
|
|
162
|
+
"i",
|
|
163
|
+
"j",
|
|
164
|
+
"k",
|
|
165
|
+
"l",
|
|
166
|
+
"m",
|
|
167
|
+
"n",
|
|
168
|
+
"o",
|
|
169
|
+
"p",
|
|
170
|
+
"s",
|
|
171
|
+
"t",
|
|
172
|
+
"u",
|
|
173
|
+
"v",
|
|
174
|
+
"w",
|
|
175
|
+
"x",
|
|
176
|
+
"y",
|
|
177
|
+
"z",
|
|
178
|
+
"\u0251",
|
|
179
|
+
"\u00e6",
|
|
180
|
+
"\u0283",
|
|
181
|
+
"\u0291",
|
|
182
|
+
"\u00e7",
|
|
183
|
+
"\u026f",
|
|
184
|
+
"\u026a",
|
|
185
|
+
"\u0254",
|
|
186
|
+
"\u025b",
|
|
187
|
+
"\u0279",
|
|
188
|
+
"\u00f0",
|
|
189
|
+
"\u0259",
|
|
190
|
+
"\u026b",
|
|
191
|
+
"\u0265",
|
|
192
|
+
"\u0278",
|
|
193
|
+
"\u028a",
|
|
194
|
+
"\u027e",
|
|
195
|
+
"\u0292",
|
|
196
|
+
"\u03b8",
|
|
197
|
+
"\u03b2",
|
|
198
|
+
"\u014b",
|
|
199
|
+
"\u0266",
|
|
200
|
+
"\u207c",
|
|
201
|
+
"\u02b0",
|
|
202
|
+
"`",
|
|
203
|
+
"^",
|
|
204
|
+
"#",
|
|
205
|
+
"*",
|
|
206
|
+
"=",
|
|
207
|
+
"\u02c8",
|
|
208
|
+
"\u02cc",
|
|
209
|
+
"\u2192",
|
|
210
|
+
"\u2193",
|
|
211
|
+
"\u2191",
|
|
212
|
+
" ",
|
|
213
|
+
]
|
|
214
|
+
with open('./text/es_phonemizer/spanish_text.txt', 'r') as f:
|
|
215
|
+
lines = f.readlines()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
used_sym = []
|
|
219
|
+
not_existed_sym = []
|
|
220
|
+
phonemes = []
|
|
221
|
+
|
|
222
|
+
for line in lines[:400]:
|
|
223
|
+
text = line.split('|')[-1].strip()
|
|
224
|
+
ipa = es2ipa(text)
|
|
225
|
+
phonemes.append(ipa + '\n')
|
|
226
|
+
for s in ipa:
|
|
227
|
+
if s not in symbols:
|
|
228
|
+
if s not in not_existed_sym:
|
|
229
|
+
print(f'not_existed char: {s}')
|
|
230
|
+
not_existed_sym.append(s)
|
|
231
|
+
else:
|
|
232
|
+
if s not in used_sym:
|
|
233
|
+
# print(f'used char: {s}')
|
|
234
|
+
used_sym.append(s)
|
|
235
|
+
|
|
236
|
+
print(used_sym)
|
|
237
|
+
print(not_existed_sym)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
with open('./text/es_phonemizer/es_symbols.txt', 'w') as g:
|
|
241
|
+
g.writelines(symbols + not_existed_sym)
|
|
242
|
+
|
|
243
|
+
with open('./text/es_phonemizer/example_ipa.txt', 'w') as g:
|
|
244
|
+
g.writelines(phonemes)
|
|
245
|
+
|
|
246
|
+
data = {'symbols': symbols + not_existed_sym}
|
|
247
|
+
with open('./text/es_phonemizer/es_symbols_v2.json', 'w') as f:
|
|
248
|
+
json.dump(data, f, indent=4)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import re
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
import six
|
|
6
|
+
|
|
7
|
+
_DEF_PUNCS = ';:,.!?¡¿—…"«»“”'
|
|
8
|
+
|
|
9
|
+
_PUNC_IDX = collections.namedtuple("_punc_index", ["punc", "position"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PuncPosition(Enum):
|
|
13
|
+
"""Enum for the punctuations positions"""
|
|
14
|
+
|
|
15
|
+
BEGIN = 0
|
|
16
|
+
END = 1
|
|
17
|
+
MIDDLE = 2
|
|
18
|
+
ALONE = 3
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Punctuation:
|
|
22
|
+
"""Handle punctuations in text.
|
|
23
|
+
|
|
24
|
+
Just strip punctuations from text or strip and restore them later.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
puncs (str): The punctuations to be processed. Defaults to `_DEF_PUNCS`.
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
>>> punc = Punctuation()
|
|
31
|
+
>>> punc.strip("This is. example !")
|
|
32
|
+
'This is example'
|
|
33
|
+
|
|
34
|
+
>>> text_striped, punc_map = punc.strip_to_restore("This is. example !")
|
|
35
|
+
>>> ' '.join(text_striped)
|
|
36
|
+
'This is example'
|
|
37
|
+
|
|
38
|
+
>>> text_restored = punc.restore(text_striped, punc_map)
|
|
39
|
+
>>> text_restored[0]
|
|
40
|
+
'This is. example !'
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, puncs: str = _DEF_PUNCS):
|
|
44
|
+
self.puncs = puncs
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def default_puncs():
|
|
48
|
+
"""Return default set of punctuations."""
|
|
49
|
+
return _DEF_PUNCS
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def puncs(self):
|
|
53
|
+
return self._puncs
|
|
54
|
+
|
|
55
|
+
@puncs.setter
|
|
56
|
+
def puncs(self, value):
|
|
57
|
+
if not isinstance(value, six.string_types):
|
|
58
|
+
raise ValueError("[!] Punctuations must be of type str.")
|
|
59
|
+
self._puncs = "".join(list(dict.fromkeys(list(value)))) # remove duplicates without changing the oreder
|
|
60
|
+
self.puncs_regular_exp = re.compile(rf"(\s*[{re.escape(self._puncs)}]+\s*)+")
|
|
61
|
+
|
|
62
|
+
def strip(self, text):
|
|
63
|
+
"""Remove all the punctuations by replacing with `space`.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
text (str): The text to be processed.
|
|
67
|
+
|
|
68
|
+
Example::
|
|
69
|
+
|
|
70
|
+
"This is. example !" -> "This is example "
|
|
71
|
+
"""
|
|
72
|
+
return re.sub(self.puncs_regular_exp, " ", text).rstrip().lstrip()
|
|
73
|
+
|
|
74
|
+
def strip_to_restore(self, text):
|
|
75
|
+
"""Remove punctuations from text to restore them later.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
text (str): The text to be processed.
|
|
79
|
+
|
|
80
|
+
Examples ::
|
|
81
|
+
|
|
82
|
+
"This is. example !" -> [["This is", "example"], [".", "!"]]
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
text, puncs = self._strip_to_restore(text)
|
|
86
|
+
return text, puncs
|
|
87
|
+
|
|
88
|
+
def _strip_to_restore(self, text):
|
|
89
|
+
"""Auxiliary method for Punctuation.preserve()"""
|
|
90
|
+
matches = list(re.finditer(self.puncs_regular_exp, text))
|
|
91
|
+
if not matches:
|
|
92
|
+
return [text], []
|
|
93
|
+
# the text is only punctuations
|
|
94
|
+
if len(matches) == 1 and matches[0].group() == text:
|
|
95
|
+
return [], [_PUNC_IDX(text, PuncPosition.ALONE)]
|
|
96
|
+
# build a punctuation map to be used later to restore punctuations
|
|
97
|
+
puncs = []
|
|
98
|
+
for match in matches:
|
|
99
|
+
position = PuncPosition.MIDDLE
|
|
100
|
+
if match == matches[0] and text.startswith(match.group()):
|
|
101
|
+
position = PuncPosition.BEGIN
|
|
102
|
+
elif match == matches[-1] and text.endswith(match.group()):
|
|
103
|
+
position = PuncPosition.END
|
|
104
|
+
puncs.append(_PUNC_IDX(match.group(), position))
|
|
105
|
+
# convert str text to a List[str], each item is separated by a punctuation
|
|
106
|
+
splitted_text = []
|
|
107
|
+
for idx, punc in enumerate(puncs):
|
|
108
|
+
split = text.split(punc.punc)
|
|
109
|
+
prefix, suffix = split[0], punc.punc.join(split[1:])
|
|
110
|
+
splitted_text.append(prefix)
|
|
111
|
+
# if the text does not end with a punctuation, add it to the last item
|
|
112
|
+
if idx == len(puncs) - 1 and len(suffix) > 0:
|
|
113
|
+
splitted_text.append(suffix)
|
|
114
|
+
text = suffix
|
|
115
|
+
while splitted_text[0] == '':
|
|
116
|
+
splitted_text = splitted_text[1:]
|
|
117
|
+
return splitted_text, puncs
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def restore(cls, text, puncs):
|
|
121
|
+
"""Restore punctuation in a text.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
text (str): The text to be processed.
|
|
125
|
+
puncs (List[str]): The list of punctuations map to be used for restoring.
|
|
126
|
+
|
|
127
|
+
Examples ::
|
|
128
|
+
|
|
129
|
+
['This is', 'example'], ['.', '!'] -> "This is. example!"
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
return cls._restore(text, puncs, 0)
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def _restore(cls, text, puncs, num): # pylint: disable=too-many-return-statements
|
|
136
|
+
"""Auxiliary method for Punctuation.restore()"""
|
|
137
|
+
if not puncs:
|
|
138
|
+
return text
|
|
139
|
+
|
|
140
|
+
# nothing have been phonemized, returns the puncs alone
|
|
141
|
+
if not text:
|
|
142
|
+
return ["".join(m.punc for m in puncs)]
|
|
143
|
+
|
|
144
|
+
current = puncs[0]
|
|
145
|
+
|
|
146
|
+
if current.position == PuncPosition.BEGIN:
|
|
147
|
+
return cls._restore([current.punc + text[0]] + text[1:], puncs[1:], num)
|
|
148
|
+
|
|
149
|
+
if current.position == PuncPosition.END:
|
|
150
|
+
return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)
|
|
151
|
+
|
|
152
|
+
if current.position == PuncPosition.ALONE:
|
|
153
|
+
return [current.mark] + cls._restore(text, puncs[1:], num + 1)
|
|
154
|
+
|
|
155
|
+
# POSITION == MIDDLE
|
|
156
|
+
if len(text) == 1: # pragma: nocover
|
|
157
|
+
# a corner case where the final part of an intermediate
|
|
158
|
+
# mark (I) has not been phonemized
|
|
159
|
+
return cls._restore([text[0] + current.punc], puncs[1:], num)
|
|
160
|
+
|
|
161
|
+
return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# if __name__ == "__main__":
|
|
165
|
+
# punc = Punctuation()
|
|
166
|
+
# text = "This is. This is, example!"
|
|
167
|
+
|
|
168
|
+
# print(punc.strip(text))
|
|
169
|
+
|
|
170
|
+
# split_text, puncs = punc.strip_to_restore(text)
|
|
171
|
+
# print(split_text, " ---- ", puncs)
|
|
172
|
+
|
|
173
|
+
# restored_text = punc.restore(split_text, puncs)
|
|
174
|
+
# print(restored_text)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dˌaβˈiðkopeɾfjl unθsbmtʃwɛxɪŋʊɣɡrɲʝʎː
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": 1,
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"outputs": [
|
|
8
|
+
{
|
|
9
|
+
"ename": "ImportError",
|
|
10
|
+
"evalue": "attempted relative import with no known parent package",
|
|
11
|
+
"output_type": "error",
|
|
12
|
+
"traceback": [
|
|
13
|
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
14
|
+
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
|
15
|
+
"\u001b[1;32m/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb Cell 1\u001b[0m line \u001b[0;36m5\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\u001b[39m,\u001b[39m \u001b[39msys\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mappend(\u001b[39m'\u001b[39m\u001b[39m/home/xumin/workspace/MyShell-VC-Training/text/es_phonemizer/\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mes_to_ipa\u001b[39;00m \u001b[39mimport\u001b[39;00m es2ipa\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msplit_sentences_en\u001b[39m(text, min_len\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m):\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m \u001b[39m# 将文本中的换行符、空格和制表符替换为空格\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a>\u001b[0m text \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\u001b[39m'\u001b[39m\u001b[39m[\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m ]+\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m, text)\n",
|
|
16
|
+
"File \u001b[0;32m/data/workspace/Bert-VITS2/text/es_phonemizer/es_to_ipa.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mcleaner\u001b[39;00m \u001b[39mimport\u001b[39;00m spanish_cleaners\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mgruut_wrapper\u001b[39;00m \u001b[39mimport\u001b[39;00m Gruut\n\u001b[1;32m 4\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mes2ipa\u001b[39m(text):\n",
|
|
17
|
+
"\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"source": [
|
|
22
|
+
"import re\n",
|
|
23
|
+
"import os\n",
|
|
24
|
+
"import os, sys\n",
|
|
25
|
+
"sys.path.append('/home/xumin/workspace/MyShell-VC-Training/text/es_phonemizer/')\n",
|
|
26
|
+
"from es_to_ipa import es2ipa\n",
|
|
27
|
+
"\n",
|
|
28
|
+
"\n",
|
|
29
|
+
"\n",
|
|
30
|
+
"def split_sentences_en(text, min_len=10):\n",
|
|
31
|
+
" # 将文本中的换行符、空格和制表符替换为空格\n",
|
|
32
|
+
" text = re.sub('[\\n\\t ]+', ' ', text)\n",
|
|
33
|
+
" # 在标点符号后添加一个空格\n",
|
|
34
|
+
" text = re.sub('([¿—¡])', r'\\1 $#!', text)\n",
|
|
35
|
+
" # 分隔句子并去除前后空格\n",
|
|
36
|
+
" \n",
|
|
37
|
+
" sentences = [s.strip() for s in text.split(' $#!')]\n",
|
|
38
|
+
" if len(sentences[-1]) == 0: del sentences[-1]\n",
|
|
39
|
+
"\n",
|
|
40
|
+
" new_sentences = []\n",
|
|
41
|
+
" new_sent = []\n",
|
|
42
|
+
" for ind, sent in enumerate(sentences):\n",
|
|
43
|
+
" if sent in ['¿', '—', '¡']:\n",
|
|
44
|
+
" new_sent.append(sent)\n",
|
|
45
|
+
" else:\n",
|
|
46
|
+
" new_sent.append(es2ipa(sent))\n",
|
|
47
|
+
" \n",
|
|
48
|
+
" \n",
|
|
49
|
+
" new_sentences = ''.join(new_sent)\n",
|
|
50
|
+
"\n",
|
|
51
|
+
" return new_sentences"
|
|
52
|
+
]
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"cell_type": "code",
|
|
56
|
+
"execution_count": 3,
|
|
57
|
+
"metadata": {},
|
|
58
|
+
"outputs": [
|
|
59
|
+
{
|
|
60
|
+
"data": {
|
|
61
|
+
"text/plain": [
|
|
62
|
+
"'—¿aβˈeis estˈaðo kasˈaða alɣˈuna bˈeθ?'"
|
|
63
|
+
]
|
|
64
|
+
},
|
|
65
|
+
"execution_count": 3,
|
|
66
|
+
"metadata": {},
|
|
67
|
+
"output_type": "execute_result"
|
|
68
|
+
}
|
|
69
|
+
],
|
|
70
|
+
"source": [
|
|
71
|
+
"split_sentences_en('—¿Habéis estado casada alguna vez?')"
|
|
72
|
+
]
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"cell_type": "code",
|
|
76
|
+
"execution_count": 4,
|
|
77
|
+
"metadata": {},
|
|
78
|
+
"outputs": [
|
|
79
|
+
{
|
|
80
|
+
"data": {
|
|
81
|
+
"text/plain": [
|
|
82
|
+
"'aβˈeis estˈaðo kasˈaða alɣˈuna bˈeθ?'"
|
|
83
|
+
]
|
|
84
|
+
},
|
|
85
|
+
"execution_count": 4,
|
|
86
|
+
"metadata": {},
|
|
87
|
+
"output_type": "execute_result"
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"source": [
|
|
91
|
+
"es2ipa('—¿Habéis estado casada alguna vez?')"
|
|
92
|
+
]
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"cell_type": "code",
|
|
96
|
+
"execution_count": null,
|
|
97
|
+
"metadata": {},
|
|
98
|
+
"outputs": [],
|
|
99
|
+
"source": []
|
|
100
|
+
}
|
|
101
|
+
],
|
|
102
|
+
"metadata": {
|
|
103
|
+
"kernelspec": {
|
|
104
|
+
"display_name": "base",
|
|
105
|
+
"language": "python",
|
|
106
|
+
"name": "python3"
|
|
107
|
+
},
|
|
108
|
+
"language_info": {
|
|
109
|
+
"codemirror_mode": {
|
|
110
|
+
"name": "ipython",
|
|
111
|
+
"version": 3
|
|
112
|
+
},
|
|
113
|
+
"file_extension": ".py",
|
|
114
|
+
"mimetype": "text/x-python",
|
|
115
|
+
"name": "python",
|
|
116
|
+
"nbconvert_exporter": "python",
|
|
117
|
+
"pygments_lexer": "ipython3",
|
|
118
|
+
"version": "3.8.18"
|
|
119
|
+
},
|
|
120
|
+
"orig_nbformat": 4
|
|
121
|
+
},
|
|
122
|
+
"nbformat": 4,
|
|
123
|
+
"nbformat_minor": 2
|
|
124
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from .punctuation import Punctuation
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BasePhonemizer(abc.ABC):
|
|
8
|
+
"""Base phonemizer class
|
|
9
|
+
|
|
10
|
+
Phonemization follows the following steps:
|
|
11
|
+
1. Preprocessing:
|
|
12
|
+
- remove empty lines
|
|
13
|
+
- remove punctuation
|
|
14
|
+
- keep track of punctuation marks
|
|
15
|
+
|
|
16
|
+
2. Phonemization:
|
|
17
|
+
- convert text to phonemes
|
|
18
|
+
|
|
19
|
+
3. Postprocessing:
|
|
20
|
+
- join phonemes
|
|
21
|
+
- restore punctuation marks
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
language (str):
|
|
25
|
+
Language used by the phonemizer.
|
|
26
|
+
|
|
27
|
+
punctuations (List[str]):
|
|
28
|
+
List of punctuation marks to be preserved.
|
|
29
|
+
|
|
30
|
+
keep_puncs (bool):
|
|
31
|
+
Whether to preserve punctuation marks or not.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False):
|
|
35
|
+
# ensure the backend is installed on the system
|
|
36
|
+
if not self.is_available():
|
|
37
|
+
raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover
|
|
38
|
+
|
|
39
|
+
# ensure the backend support the requested language
|
|
40
|
+
self._language = self._init_language(language)
|
|
41
|
+
|
|
42
|
+
# setup punctuation processing
|
|
43
|
+
self._keep_puncs = keep_puncs
|
|
44
|
+
self._punctuator = Punctuation(punctuations)
|
|
45
|
+
|
|
46
|
+
def _init_language(self, language):
|
|
47
|
+
"""Language initialization
|
|
48
|
+
|
|
49
|
+
This method may be overloaded in child classes (see Segments backend)
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
if not self.is_supported_language(language):
|
|
53
|
+
raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
|
|
54
|
+
return language
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def language(self):
|
|
58
|
+
"""The language code configured to be used for phonemization"""
|
|
59
|
+
return self._language
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
@abc.abstractmethod
|
|
63
|
+
def name():
|
|
64
|
+
"""The name of the backend"""
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
@abc.abstractmethod
|
|
69
|
+
def is_available(cls):
|
|
70
|
+
"""Returns True if the backend is installed, False otherwise"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
@abc.abstractmethod
|
|
75
|
+
def version(cls):
|
|
76
|
+
"""Return the backend version as a tuple (major, minor, patch)"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
@abc.abstractmethod
|
|
81
|
+
def supported_languages():
|
|
82
|
+
"""Return a dict of language codes -> name supported by the backend"""
|
|
83
|
+
...
|
|
84
|
+
|
|
85
|
+
def is_supported_language(self, language):
|
|
86
|
+
"""Returns True if `language` is supported by the backend"""
|
|
87
|
+
return language in self.supported_languages()
|
|
88
|
+
|
|
89
|
+
@abc.abstractmethod
|
|
90
|
+
def _phonemize(self, text, separator):
|
|
91
|
+
"""The main phonemization method"""
|
|
92
|
+
|
|
93
|
+
def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
|
|
94
|
+
"""Preprocess the text before phonemization
|
|
95
|
+
|
|
96
|
+
1. remove spaces
|
|
97
|
+
2. remove punctuation
|
|
98
|
+
|
|
99
|
+
Override this if you need a different behaviour
|
|
100
|
+
"""
|
|
101
|
+
text = text.strip()
|
|
102
|
+
if self._keep_puncs:
|
|
103
|
+
# a tuple (text, punctuation marks)
|
|
104
|
+
return self._punctuator.strip_to_restore(text)
|
|
105
|
+
return [self._punctuator.strip(text)], []
|
|
106
|
+
|
|
107
|
+
def _phonemize_postprocess(self, phonemized, punctuations) -> str:
|
|
108
|
+
"""Postprocess the raw phonemized output
|
|
109
|
+
|
|
110
|
+
Override this if you need a different behaviour
|
|
111
|
+
"""
|
|
112
|
+
if self._keep_puncs:
|
|
113
|
+
return self._punctuator.restore(phonemized, punctuations)[0]
|
|
114
|
+
return phonemized[0]
|
|
115
|
+
|
|
116
|
+
def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument
|
|
117
|
+
"""Returns the `text` phonemized for the given language
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
text (str):
|
|
121
|
+
Text to be phonemized.
|
|
122
|
+
|
|
123
|
+
separator (str):
|
|
124
|
+
string separator used between phonemes. Default to '_'.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
(str): Phonemized text
|
|
128
|
+
"""
|
|
129
|
+
text, punctuations = self._phonemize_preprocess(text)
|
|
130
|
+
phonemized = []
|
|
131
|
+
for t in text:
|
|
132
|
+
p = self._phonemize(t, separator)
|
|
133
|
+
phonemized.append(p)
|
|
134
|
+
phonemized = self._phonemize_postprocess(phonemized, punctuations)
|
|
135
|
+
return phonemized
|
|
136
|
+
|
|
137
|
+
def print_logs(self, level: int = 0):
|
|
138
|
+
indent = "\t" * level
|
|
139
|
+
print(f"{indent}| > phoneme language: {self.language}")
|
|
140
|
+
print(f"{indent}| > phoneme backend: {self.name()}")
|