xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +400 -3
- xinference/client/restful/async_restful_client.py +20 -3
- xinference/client/restful/restful_client.py +20 -3
- xinference/constants.py +2 -0
- xinference/core/supervisor.py +111 -49
- xinference/core/worker.py +10 -0
- xinference/deploy/cmdline.py +15 -0
- xinference/model/audio/core.py +26 -6
- xinference/model/audio/indextts2.py +166 -0
- xinference/model/audio/kokoro.py +1 -1
- xinference/model/audio/kokoro_zh.py +124 -0
- xinference/model/audio/model_spec.json +58 -1
- xinference/model/embedding/sentence_transformers/core.py +4 -4
- xinference/model/embedding/vllm/core.py +7 -1
- xinference/model/image/model_spec.json +71 -3
- xinference/model/image/stable_diffusion/core.py +13 -4
- xinference/model/llm/__init__.py +4 -0
- xinference/model/llm/core.py +10 -0
- xinference/model/llm/llama_cpp/core.py +1 -0
- xinference/model/llm/llm_family.json +503 -21
- xinference/model/llm/llm_family.py +1 -0
- xinference/model/llm/mlx/core.py +52 -33
- xinference/model/llm/sglang/core.py +32 -55
- xinference/model/llm/tool_parsers/__init__.py +58 -0
- xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
- xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
- xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
- xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
- xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
- xinference/model/llm/transformers/core.py +1 -1
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
- xinference/model/llm/utils.py +138 -53
- xinference/model/llm/vllm/core.py +95 -78
- xinference/thirdparty/audiotools/__init__.py +10 -0
- xinference/thirdparty/audiotools/core/__init__.py +4 -0
- xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
- xinference/thirdparty/audiotools/core/display.py +194 -0
- xinference/thirdparty/audiotools/core/dsp.py +390 -0
- xinference/thirdparty/audiotools/core/effects.py +647 -0
- xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
- xinference/thirdparty/audiotools/core/loudness.py +320 -0
- xinference/thirdparty/audiotools/core/playback.py +252 -0
- xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
- xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
- xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
- xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
- xinference/thirdparty/audiotools/core/util.py +671 -0
- xinference/thirdparty/audiotools/core/whisper.py +97 -0
- xinference/thirdparty/audiotools/data/__init__.py +3 -0
- xinference/thirdparty/audiotools/data/datasets.py +517 -0
- xinference/thirdparty/audiotools/data/preprocess.py +81 -0
- xinference/thirdparty/audiotools/data/transforms.py +1592 -0
- xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
- xinference/thirdparty/audiotools/metrics/distance.py +131 -0
- xinference/thirdparty/audiotools/metrics/quality.py +159 -0
- xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
- xinference/thirdparty/audiotools/ml/__init__.py +5 -0
- xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
- xinference/thirdparty/audiotools/ml/decorators.py +440 -0
- xinference/thirdparty/audiotools/ml/experiment.py +90 -0
- xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
- xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
- xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
- xinference/thirdparty/audiotools/post.py +140 -0
- xinference/thirdparty/audiotools/preference.py +600 -0
- xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
- xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
- xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
- xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
- xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
- xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
- xinference/thirdparty/indextts/__init__.py +0 -0
- xinference/thirdparty/indextts/cli.py +65 -0
- xinference/thirdparty/indextts/gpt/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
- xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
- xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
- xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
- xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
- xinference/thirdparty/indextts/gpt/model.py +713 -0
- xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
- xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
- xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
- xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
- xinference/thirdparty/indextts/infer.py +690 -0
- xinference/thirdparty/indextts/infer_v2.py +739 -0
- xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
- xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
- xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
- xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
- xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
- xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
- xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
- xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
- xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
- xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
- xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
- xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
- xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
- xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
- xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
- xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
- xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
- xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
- xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
- xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
- xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
- xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
- xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
- xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
- xinference/thirdparty/indextts/utils/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/arch_util.py +120 -0
- xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
- xinference/thirdparty/indextts/utils/common.py +121 -0
- xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
- xinference/thirdparty/indextts/utils/front.py +536 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
- xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
- xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
- xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
- xinference/thirdparty/indextts/utils/text_utils.py +41 -0
- xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
- xinference/thirdparty/indextts/utils/utils.py +93 -0
- xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
- xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
- xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
- xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
- xinference/types.py +105 -2
- xinference/ui/gradio/media_interface.py +66 -8
- xinference/ui/web/ui/build/asset-manifest.json +6 -6
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
- xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
- xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
- xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
- xinference/ui/web/ui/package-lock.json +0 -34
- xinference/ui/web/ui/package.json +0 -1
- xinference/ui/web/ui/src/locales/en.json +9 -3
- xinference/ui/web/ui/src/locales/ja.json +9 -3
- xinference/ui/web/ui/src/locales/ko.json +9 -3
- xinference/ui/web/ui/src/locales/zh.json +9 -3
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
- xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
- xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
- xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
- xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
- xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
- xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
- xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
- xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
- xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
- xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
- xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
- xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
- xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
- xinference/ui/web/ui/node_modules/select/bower.json +0 -13
- xinference/ui/web/ui/node_modules/select/package.json +0 -29
- xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import argbind
|
|
4
|
+
from audiotools import ml
|
|
5
|
+
|
|
6
|
+
import indextts.s2mel.dac as dac
|
|
7
|
+
|
|
8
|
+
DAC = dac.model.DAC
|
|
9
|
+
Accelerator = ml.Accelerator
|
|
10
|
+
|
|
11
|
+
__MODEL_LATEST_TAGS__ = {
|
|
12
|
+
("44khz", "8kbps"): "0.0.1",
|
|
13
|
+
("24khz", "8kbps"): "0.0.4",
|
|
14
|
+
("16khz", "8kbps"): "0.0.5",
|
|
15
|
+
("44khz", "16kbps"): "1.0.0",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
__MODEL_URLS__ = {
|
|
19
|
+
(
|
|
20
|
+
"44khz",
|
|
21
|
+
"0.0.1",
|
|
22
|
+
"8kbps",
|
|
23
|
+
): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.1/weights.pth",
|
|
24
|
+
(
|
|
25
|
+
"24khz",
|
|
26
|
+
"0.0.4",
|
|
27
|
+
"8kbps",
|
|
28
|
+
): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.4/weights_24khz.pth",
|
|
29
|
+
(
|
|
30
|
+
"16khz",
|
|
31
|
+
"0.0.5",
|
|
32
|
+
"8kbps",
|
|
33
|
+
): "https://github.com/descriptinc/descript-audio-codec/releases/download/0.0.5/weights_16khz.pth",
|
|
34
|
+
(
|
|
35
|
+
"44khz",
|
|
36
|
+
"1.0.0",
|
|
37
|
+
"16kbps",
|
|
38
|
+
): "https://github.com/descriptinc/descript-audio-codec/releases/download/1.0.0/weights_44khz_16kbps.pth",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@argbind.bind(group="download", positional=True, without_prefix=True)
|
|
43
|
+
def download(
|
|
44
|
+
model_type: str = "44khz", model_bitrate: str = "8kbps", tag: str = "latest"
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Function that downloads the weights file from URL if a local cache is not found.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
model_type : str
|
|
52
|
+
The type of model to download. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz".
|
|
53
|
+
model_bitrate: str
|
|
54
|
+
Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
|
|
55
|
+
Only 44khz model supports 16kbps.
|
|
56
|
+
tag : str
|
|
57
|
+
The tag of the model to download. Defaults to "latest".
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
Path
|
|
62
|
+
Directory path required to load model via audiotools.
|
|
63
|
+
"""
|
|
64
|
+
model_type = model_type.lower()
|
|
65
|
+
tag = tag.lower()
|
|
66
|
+
|
|
67
|
+
assert model_type in [
|
|
68
|
+
"44khz",
|
|
69
|
+
"24khz",
|
|
70
|
+
"16khz",
|
|
71
|
+
], "model_type must be one of '44khz', '24khz', or '16khz'"
|
|
72
|
+
|
|
73
|
+
assert model_bitrate in [
|
|
74
|
+
"8kbps",
|
|
75
|
+
"16kbps",
|
|
76
|
+
], "model_bitrate must be one of '8kbps', or '16kbps'"
|
|
77
|
+
|
|
78
|
+
if tag == "latest":
|
|
79
|
+
tag = __MODEL_LATEST_TAGS__[(model_type, model_bitrate)]
|
|
80
|
+
|
|
81
|
+
download_link = __MODEL_URLS__.get((model_type, tag, model_bitrate), None)
|
|
82
|
+
|
|
83
|
+
if download_link is None:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Could not find model with tag {tag} and model type {model_type}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
local_path = (
|
|
89
|
+
Path.home()
|
|
90
|
+
/ ".cache"
|
|
91
|
+
/ "descript"
|
|
92
|
+
/ "dac"
|
|
93
|
+
/ f"weights_{model_type}_{model_bitrate}_{tag}.pth"
|
|
94
|
+
)
|
|
95
|
+
if not local_path.exists():
|
|
96
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
|
|
98
|
+
# Download the model
|
|
99
|
+
import requests
|
|
100
|
+
|
|
101
|
+
response = requests.get(download_link)
|
|
102
|
+
|
|
103
|
+
if response.status_code != 200:
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"Could not download model. Received response code {response.status_code}"
|
|
106
|
+
)
|
|
107
|
+
local_path.write_bytes(response.content)
|
|
108
|
+
|
|
109
|
+
return local_path
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def load_model(
|
|
113
|
+
model_type: str = "44khz",
|
|
114
|
+
model_bitrate: str = "8kbps",
|
|
115
|
+
tag: str = "latest",
|
|
116
|
+
load_path: str = None,
|
|
117
|
+
):
|
|
118
|
+
if not load_path:
|
|
119
|
+
load_path = download(
|
|
120
|
+
model_type=model_type, model_bitrate=model_bitrate, tag=tag
|
|
121
|
+
)
|
|
122
|
+
generator = DAC.load(load_path)
|
|
123
|
+
return generator
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import argbind
|
|
5
|
+
import numpy as np
|
|
6
|
+
import torch
|
|
7
|
+
from audiotools import AudioSignal
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from dac import DACFile
|
|
11
|
+
from dac.utils import load_model
|
|
12
|
+
|
|
13
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@argbind.bind(group="decode", positional=True, without_prefix=True)
|
|
17
|
+
@torch.inference_mode()
|
|
18
|
+
@torch.no_grad()
|
|
19
|
+
def decode(
|
|
20
|
+
input: str,
|
|
21
|
+
output: str = "",
|
|
22
|
+
weights_path: str = "",
|
|
23
|
+
model_tag: str = "latest",
|
|
24
|
+
model_bitrate: str = "8kbps",
|
|
25
|
+
device: str = "cuda",
|
|
26
|
+
model_type: str = "44khz",
|
|
27
|
+
verbose: bool = False,
|
|
28
|
+
):
|
|
29
|
+
"""Decode audio from codes.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
input : str
|
|
34
|
+
Path to input directory or file
|
|
35
|
+
output : str, optional
|
|
36
|
+
Path to output directory, by default "".
|
|
37
|
+
If `input` is a directory, the directory sub-tree relative to `input` is re-created in `output`.
|
|
38
|
+
weights_path : str, optional
|
|
39
|
+
Path to weights file, by default "". If not specified, the weights file will be downloaded from the internet using the
|
|
40
|
+
model_tag and model_type.
|
|
41
|
+
model_tag : str, optional
|
|
42
|
+
Tag of the model to use, by default "latest". Ignored if `weights_path` is specified.
|
|
43
|
+
model_bitrate: str
|
|
44
|
+
Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
|
|
45
|
+
device : str, optional
|
|
46
|
+
Device to use, by default "cuda". If "cpu", the model will be loaded on the CPU.
|
|
47
|
+
model_type : str, optional
|
|
48
|
+
The type of model to use. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz". Ignored if `weights_path` is specified.
|
|
49
|
+
"""
|
|
50
|
+
generator = load_model(
|
|
51
|
+
model_type=model_type,
|
|
52
|
+
model_bitrate=model_bitrate,
|
|
53
|
+
tag=model_tag,
|
|
54
|
+
load_path=weights_path,
|
|
55
|
+
)
|
|
56
|
+
generator.to(device)
|
|
57
|
+
generator.eval()
|
|
58
|
+
|
|
59
|
+
# Find all .dac files in input directory
|
|
60
|
+
_input = Path(input)
|
|
61
|
+
input_files = list(_input.glob("**/*.dac"))
|
|
62
|
+
|
|
63
|
+
# If input is a .dac file, add it to the list
|
|
64
|
+
if _input.suffix == ".dac":
|
|
65
|
+
input_files.append(_input)
|
|
66
|
+
|
|
67
|
+
# Create output directory
|
|
68
|
+
output = Path(output)
|
|
69
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
for i in tqdm(range(len(input_files)), desc=f"Decoding files"):
|
|
72
|
+
# Load file
|
|
73
|
+
artifact = DACFile.load(input_files[i])
|
|
74
|
+
|
|
75
|
+
# Reconstruct audio from codes
|
|
76
|
+
recons = generator.decompress(artifact, verbose=verbose)
|
|
77
|
+
|
|
78
|
+
# Compute output path
|
|
79
|
+
relative_path = input_files[i].relative_to(input)
|
|
80
|
+
output_dir = output / relative_path.parent
|
|
81
|
+
if not relative_path.name:
|
|
82
|
+
output_dir = output
|
|
83
|
+
relative_path = input_files[i]
|
|
84
|
+
output_name = relative_path.with_suffix(".wav").name
|
|
85
|
+
output_path = output_dir / output_name
|
|
86
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
# Write to file
|
|
89
|
+
recons.write(output_path)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
args = argbind.parse_args()
|
|
94
|
+
with argbind.scope(args):
|
|
95
|
+
decode()
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import math
|
|
2
|
+
import warnings
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import argbind
|
|
6
|
+
import numpy as np
|
|
7
|
+
import torch
|
|
8
|
+
from audiotools import AudioSignal
|
|
9
|
+
from audiotools.core import util
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from dac.utils import load_model
|
|
13
|
+
|
|
14
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@argbind.bind(group="encode", positional=True, without_prefix=True)
|
|
18
|
+
@torch.inference_mode()
|
|
19
|
+
@torch.no_grad()
|
|
20
|
+
def encode(
|
|
21
|
+
input: str,
|
|
22
|
+
output: str = "",
|
|
23
|
+
weights_path: str = "",
|
|
24
|
+
model_tag: str = "latest",
|
|
25
|
+
model_bitrate: str = "8kbps",
|
|
26
|
+
n_quantizers: int = None,
|
|
27
|
+
device: str = "cuda",
|
|
28
|
+
model_type: str = "44khz",
|
|
29
|
+
win_duration: float = 5.0,
|
|
30
|
+
verbose: bool = False,
|
|
31
|
+
):
|
|
32
|
+
"""Encode audio files in input path to .dac format.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
input : str
|
|
37
|
+
Path to input audio file or directory
|
|
38
|
+
output : str, optional
|
|
39
|
+
Path to output directory, by default "". If `input` is a directory, the directory sub-tree relative to `input` is re-created in `output`.
|
|
40
|
+
weights_path : str, optional
|
|
41
|
+
Path to weights file, by default "". If not specified, the weights file will be downloaded from the internet using the
|
|
42
|
+
model_tag and model_type.
|
|
43
|
+
model_tag : str, optional
|
|
44
|
+
Tag of the model to use, by default "latest". Ignored if `weights_path` is specified.
|
|
45
|
+
model_bitrate: str
|
|
46
|
+
Bitrate of the model. Must be one of "8kbps", or "16kbps". Defaults to "8kbps".
|
|
47
|
+
n_quantizers : int, optional
|
|
48
|
+
Number of quantizers to use, by default None. If not specified, all the quantizers will be used and the model will compress at maximum bitrate.
|
|
49
|
+
device : str, optional
|
|
50
|
+
Device to use, by default "cuda"
|
|
51
|
+
model_type : str, optional
|
|
52
|
+
The type of model to use. Must be one of "44khz", "24khz", or "16khz". Defaults to "44khz". Ignored if `weights_path` is specified.
|
|
53
|
+
"""
|
|
54
|
+
generator = load_model(
|
|
55
|
+
model_type=model_type,
|
|
56
|
+
model_bitrate=model_bitrate,
|
|
57
|
+
tag=model_tag,
|
|
58
|
+
load_path=weights_path,
|
|
59
|
+
)
|
|
60
|
+
generator.to(device)
|
|
61
|
+
generator.eval()
|
|
62
|
+
kwargs = {"n_quantizers": n_quantizers}
|
|
63
|
+
|
|
64
|
+
# Find all audio files in input path
|
|
65
|
+
input = Path(input)
|
|
66
|
+
audio_files = util.find_audio(input)
|
|
67
|
+
|
|
68
|
+
output = Path(output)
|
|
69
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
for i in tqdm(range(len(audio_files)), desc="Encoding files"):
|
|
72
|
+
# Load file
|
|
73
|
+
signal = AudioSignal(audio_files[i])
|
|
74
|
+
|
|
75
|
+
# Encode audio to .dac format
|
|
76
|
+
artifact = generator.compress(signal, win_duration, verbose=verbose, **kwargs)
|
|
77
|
+
|
|
78
|
+
# Compute output path
|
|
79
|
+
relative_path = audio_files[i].relative_to(input)
|
|
80
|
+
output_dir = output / relative_path.parent
|
|
81
|
+
if not relative_path.name:
|
|
82
|
+
output_dir = output
|
|
83
|
+
relative_path = audio_files[i]
|
|
84
|
+
output_name = relative_path.with_suffix(".dac").name
|
|
85
|
+
output_path = output_dir / output_name
|
|
86
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
artifact.save(output_path)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
args = argbind.parse_args()
|
|
93
|
+
with argbind.scope(args):
|
|
94
|
+
encode()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from huggingface_hub import hf_hub_download
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def load_custom_model_from_hf(repo_id, model_filename="pytorch_model.bin", config_filename="config.yml"):
|
|
6
|
+
os.makedirs("./checkpoints", exist_ok=True)
|
|
7
|
+
model_path = hf_hub_download(repo_id=repo_id, filename=model_filename, cache_dir="./checkpoints")
|
|
8
|
+
if config_filename is None:
|
|
9
|
+
return model_path
|
|
10
|
+
config_path = hf_hub_download(repo_id=repo_id, filename=config_filename, cache_dir="./checkpoints")
|
|
11
|
+
|
|
12
|
+
return model_path, config_path
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
|
2
|
+
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
from .resample import UpSample1d, DownSample1d
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Activation1d(nn.Module):
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
activation,
|
|
11
|
+
up_ratio: int = 2,
|
|
12
|
+
down_ratio: int = 2,
|
|
13
|
+
up_kernel_size: int = 12,
|
|
14
|
+
down_kernel_size: int = 12,
|
|
15
|
+
):
|
|
16
|
+
super().__init__()
|
|
17
|
+
self.up_ratio = up_ratio
|
|
18
|
+
self.down_ratio = down_ratio
|
|
19
|
+
self.act = activation
|
|
20
|
+
self.upsample = UpSample1d(up_ratio, up_kernel_size)
|
|
21
|
+
self.downsample = DownSample1d(down_ratio, down_kernel_size)
|
|
22
|
+
|
|
23
|
+
# x: [B,C,T]
|
|
24
|
+
def forward(self, x):
|
|
25
|
+
x = self.upsample(x)
|
|
26
|
+
x = self.act(x)
|
|
27
|
+
x = self.downsample(x)
|
|
28
|
+
|
|
29
|
+
return x
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
import torch.nn as nn
|
|
5
|
+
import torch.nn.functional as F
|
|
6
|
+
import math
|
|
7
|
+
|
|
8
|
+
if "sinc" in dir(torch):
|
|
9
|
+
sinc = torch.sinc
|
|
10
|
+
else:
|
|
11
|
+
# This code is adopted from adefossez's julius.core.sinc under the MIT License
|
|
12
|
+
# https://adefossez.github.io/julius/julius/core.html
|
|
13
|
+
def sinc(x: torch.Tensor):
|
|
14
|
+
"""
|
|
15
|
+
Implementation of sinc, i.e. sin(pi * x) / (pi * x)
|
|
16
|
+
__Warning__: Different to julius.sinc, the input is multiplied by `pi`!
|
|
17
|
+
"""
|
|
18
|
+
return torch.where(
|
|
19
|
+
x == 0,
|
|
20
|
+
torch.tensor(1.0, device=x.device, dtype=x.dtype),
|
|
21
|
+
torch.sin(math.pi * x) / math.pi / x,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
|
|
26
|
+
# https://adefossez.github.io/julius/julius/lowpass.html
|
|
27
|
+
def kaiser_sinc_filter1d(
|
|
28
|
+
cutoff, half_width, kernel_size
|
|
29
|
+
): # return filter [1,1,kernel_size]
|
|
30
|
+
even = kernel_size % 2 == 0
|
|
31
|
+
half_size = kernel_size // 2
|
|
32
|
+
|
|
33
|
+
# For kaiser window
|
|
34
|
+
delta_f = 4 * half_width
|
|
35
|
+
A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
|
|
36
|
+
if A > 50.0:
|
|
37
|
+
beta = 0.1102 * (A - 8.7)
|
|
38
|
+
elif A >= 21.0:
|
|
39
|
+
beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21.0)
|
|
40
|
+
else:
|
|
41
|
+
beta = 0.0
|
|
42
|
+
window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
|
|
43
|
+
|
|
44
|
+
# ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
|
|
45
|
+
if even:
|
|
46
|
+
time = torch.arange(-half_size, half_size) + 0.5
|
|
47
|
+
else:
|
|
48
|
+
time = torch.arange(kernel_size) - half_size
|
|
49
|
+
if cutoff == 0:
|
|
50
|
+
filter_ = torch.zeros_like(time)
|
|
51
|
+
else:
|
|
52
|
+
filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
|
|
53
|
+
# Normalize filter to have sum = 1, otherwise we will have a small leakage
|
|
54
|
+
# of the constant component in the input signal.
|
|
55
|
+
filter_ /= filter_.sum()
|
|
56
|
+
filter = filter_.view(1, 1, kernel_size)
|
|
57
|
+
|
|
58
|
+
return filter
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class LowPassFilter1d(nn.Module):
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
cutoff=0.5,
|
|
65
|
+
half_width=0.6,
|
|
66
|
+
stride: int = 1,
|
|
67
|
+
padding: bool = True,
|
|
68
|
+
padding_mode: str = "replicate",
|
|
69
|
+
kernel_size: int = 12,
|
|
70
|
+
):
|
|
71
|
+
# kernel_size should be even number for stylegan3 setup,
|
|
72
|
+
# in this implementation, odd number is also possible.
|
|
73
|
+
super().__init__()
|
|
74
|
+
if cutoff < -0.0:
|
|
75
|
+
raise ValueError("Minimum cutoff must be larger than zero.")
|
|
76
|
+
if cutoff > 0.5:
|
|
77
|
+
raise ValueError("A cutoff above 0.5 does not make sense.")
|
|
78
|
+
self.kernel_size = kernel_size
|
|
79
|
+
self.even = kernel_size % 2 == 0
|
|
80
|
+
self.pad_left = kernel_size // 2 - int(self.even)
|
|
81
|
+
self.pad_right = kernel_size // 2
|
|
82
|
+
self.stride = stride
|
|
83
|
+
self.padding = padding
|
|
84
|
+
self.padding_mode = padding_mode
|
|
85
|
+
filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
|
|
86
|
+
self.register_buffer("filter", filter)
|
|
87
|
+
|
|
88
|
+
# input [B, C, T]
|
|
89
|
+
def forward(self, x):
|
|
90
|
+
_, C, _ = x.shape
|
|
91
|
+
|
|
92
|
+
if self.padding:
|
|
93
|
+
x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode)
|
|
94
|
+
out = F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C)
|
|
95
|
+
|
|
96
|
+
return out
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
|
2
|
+
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
from torch.nn import functional as F
|
|
5
|
+
from .filter import LowPassFilter1d
|
|
6
|
+
from .filter import kaiser_sinc_filter1d
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UpSample1d(nn.Module):
|
|
10
|
+
def __init__(self, ratio=2, kernel_size=None):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.ratio = ratio
|
|
13
|
+
self.kernel_size = (
|
|
14
|
+
int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
|
|
15
|
+
)
|
|
16
|
+
self.stride = ratio
|
|
17
|
+
self.pad = self.kernel_size // ratio - 1
|
|
18
|
+
self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
|
|
19
|
+
self.pad_right = (
|
|
20
|
+
self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
|
|
21
|
+
)
|
|
22
|
+
filter = kaiser_sinc_filter1d(
|
|
23
|
+
cutoff=0.5 / ratio, half_width=0.6 / ratio, kernel_size=self.kernel_size
|
|
24
|
+
)
|
|
25
|
+
self.register_buffer("filter", filter)
|
|
26
|
+
|
|
27
|
+
# x: [B, C, T]
|
|
28
|
+
def forward(self, x):
|
|
29
|
+
_, C, _ = x.shape
|
|
30
|
+
|
|
31
|
+
x = F.pad(x, (self.pad, self.pad), mode="replicate")
|
|
32
|
+
x = self.ratio * F.conv_transpose1d(
|
|
33
|
+
x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C
|
|
34
|
+
)
|
|
35
|
+
x = x[..., self.pad_left : -self.pad_right]
|
|
36
|
+
|
|
37
|
+
return x
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DownSample1d(nn.Module):
|
|
41
|
+
def __init__(self, ratio=2, kernel_size=None):
|
|
42
|
+
super().__init__()
|
|
43
|
+
self.ratio = ratio
|
|
44
|
+
self.kernel_size = (
|
|
45
|
+
int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
|
|
46
|
+
)
|
|
47
|
+
self.lowpass = LowPassFilter1d(
|
|
48
|
+
cutoff=0.5 / ratio,
|
|
49
|
+
half_width=0.6 / ratio,
|
|
50
|
+
stride=ratio,
|
|
51
|
+
kernel_size=self.kernel_size,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def forward(self, x):
|
|
55
|
+
xx = self.lowpass(x)
|
|
56
|
+
|
|
57
|
+
return xx
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import torch
|
|
3
|
+
import torch.utils.data
|
|
4
|
+
from librosa.filters import mel as librosa_mel_fn
|
|
5
|
+
from scipy.io.wavfile import read
|
|
6
|
+
|
|
7
|
+
MAX_WAV_VALUE = 32768.0
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_wav(full_path):
|
|
11
|
+
sampling_rate, data = read(full_path)
|
|
12
|
+
return data, sampling_rate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
|
16
|
+
return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def dynamic_range_decompression(x, C=1):
|
|
20
|
+
return np.exp(x) / C
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
|
|
24
|
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def dynamic_range_decompression_torch(x, C=1):
|
|
28
|
+
return torch.exp(x) / C
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def spectral_normalize_torch(magnitudes):
|
|
32
|
+
output = dynamic_range_compression_torch(magnitudes)
|
|
33
|
+
return output
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def spectral_de_normalize_torch(magnitudes):
|
|
37
|
+
output = dynamic_range_decompression_torch(magnitudes)
|
|
38
|
+
return output
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
mel_basis = {}
|
|
42
|
+
hann_window = {}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
|
|
46
|
+
# if torch.min(y) < -1.0:
|
|
47
|
+
# print("min value is ", torch.min(y))
|
|
48
|
+
# if torch.max(y) > 1.0:
|
|
49
|
+
# print("max value is ", torch.max(y))
|
|
50
|
+
|
|
51
|
+
global mel_basis, hann_window # pylint: disable=global-statement
|
|
52
|
+
if f"{str(sampling_rate)}_{str(fmax)}_{str(y.device)}" not in mel_basis:
|
|
53
|
+
mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
|
|
54
|
+
mel_basis[str(sampling_rate) + "_" + str(fmax) + "_" + str(y.device)] = torch.from_numpy(mel).float().to(y.device)
|
|
55
|
+
hann_window[str(sampling_rate) + "_" + str(y.device)] = torch.hann_window(win_size).to(y.device)
|
|
56
|
+
|
|
57
|
+
y = torch.nn.functional.pad(
|
|
58
|
+
y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect"
|
|
59
|
+
)
|
|
60
|
+
y = y.squeeze(1)
|
|
61
|
+
|
|
62
|
+
spec = torch.view_as_real(
|
|
63
|
+
torch.stft(
|
|
64
|
+
y,
|
|
65
|
+
n_fft,
|
|
66
|
+
hop_length=hop_size,
|
|
67
|
+
win_length=win_size,
|
|
68
|
+
window=hann_window[str(sampling_rate) + "_" + str(y.device)],
|
|
69
|
+
center=center,
|
|
70
|
+
pad_mode="reflect",
|
|
71
|
+
normalized=False,
|
|
72
|
+
onesided=True,
|
|
73
|
+
return_complex=True,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
|
|
78
|
+
|
|
79
|
+
spec = torch.matmul(mel_basis[str(sampling_rate) + "_" + str(fmax) + "_" + str(y.device)], spec)
|
|
80
|
+
spec = spectral_normalize_torch(spec)
|
|
81
|
+
|
|
82
|
+
return spec
|