xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +400 -3
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/constants.py +2 -0
  6. xinference/core/supervisor.py +111 -49
  7. xinference/core/worker.py +10 -0
  8. xinference/deploy/cmdline.py +15 -0
  9. xinference/model/audio/core.py +26 -6
  10. xinference/model/audio/indextts2.py +166 -0
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +58 -1
  14. xinference/model/embedding/sentence_transformers/core.py +4 -4
  15. xinference/model/embedding/vllm/core.py +7 -1
  16. xinference/model/image/model_spec.json +71 -3
  17. xinference/model/image/stable_diffusion/core.py +13 -4
  18. xinference/model/llm/__init__.py +4 -0
  19. xinference/model/llm/core.py +10 -0
  20. xinference/model/llm/llama_cpp/core.py +1 -0
  21. xinference/model/llm/llm_family.json +503 -21
  22. xinference/model/llm/llm_family.py +1 -0
  23. xinference/model/llm/mlx/core.py +52 -33
  24. xinference/model/llm/sglang/core.py +32 -55
  25. xinference/model/llm/tool_parsers/__init__.py +58 -0
  26. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  27. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +190 -0
  28. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  29. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  30. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  31. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  32. xinference/model/llm/transformers/core.py +1 -1
  33. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  34. xinference/model/llm/utils.py +138 -53
  35. xinference/model/llm/vllm/core.py +95 -78
  36. xinference/thirdparty/audiotools/__init__.py +10 -0
  37. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  38. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  39. xinference/thirdparty/audiotools/core/display.py +194 -0
  40. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  41. xinference/thirdparty/audiotools/core/effects.py +647 -0
  42. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  43. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  44. xinference/thirdparty/audiotools/core/playback.py +252 -0
  45. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  46. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  47. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  48. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  49. xinference/thirdparty/audiotools/core/util.py +671 -0
  50. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  51. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  52. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  53. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  54. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  55. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  56. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  57. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  58. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  59. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  60. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  61. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  62. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  63. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  64. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  65. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  66. xinference/thirdparty/audiotools/post.py +140 -0
  67. xinference/thirdparty/audiotools/preference.py +600 -0
  68. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  69. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  70. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  72. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  73. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  74. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  75. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  76. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  77. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  78. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  79. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  80. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  81. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  82. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  83. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  84. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  85. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  86. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  87. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  88. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  89. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  90. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  91. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  92. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  93. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  94. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  95. xinference/thirdparty/indextts/__init__.py +0 -0
  96. xinference/thirdparty/indextts/cli.py +65 -0
  97. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  98. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  99. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  100. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  101. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  102. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  103. xinference/thirdparty/indextts/gpt/model.py +713 -0
  104. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  105. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  106. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  107. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  108. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  109. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  110. xinference/thirdparty/indextts/infer.py +690 -0
  111. xinference/thirdparty/indextts/infer_v2.py +739 -0
  112. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  113. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  114. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  115. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  116. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  117. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  118. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  119. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  120. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  121. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  122. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  123. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  124. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  125. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  126. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  127. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  128. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  129. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  130. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  131. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  133. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  134. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  135. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  136. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  137. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  138. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  139. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  140. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  141. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  142. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  143. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  144. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  145. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  146. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  147. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  148. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  149. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  150. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  151. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  152. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  153. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  154. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  155. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  156. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  157. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  158. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  159. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  160. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  161. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  162. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  163. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  164. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  165. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  166. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  167. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  168. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  169. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  170. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  171. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  172. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  173. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  174. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  175. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  176. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  177. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  178. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  179. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  180. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  181. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  182. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  183. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  184. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  185. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  186. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  187. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  188. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  189. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  190. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  191. xinference/thirdparty/indextts/utils/common.py +121 -0
  192. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  193. xinference/thirdparty/indextts/utils/front.py +536 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  240. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  241. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  242. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  243. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  244. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  245. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  246. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  247. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  248. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  249. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  250. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  251. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  252. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  253. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  254. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  255. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  256. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  257. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  258. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  259. xinference/thirdparty/indextts/utils/utils.py +93 -0
  260. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  261. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  262. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  263. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  264. xinference/types.py +105 -2
  265. xinference/ui/gradio/media_interface.py +66 -8
  266. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  267. xinference/ui/web/ui/build/index.html +1 -1
  268. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  269. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  270. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  271. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  272. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  273. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  274. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  275. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  276. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  277. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  278. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  279. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  280. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  281. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  282. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  283. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  284. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  285. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  290. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  291. xinference/ui/web/ui/package-lock.json +0 -34
  292. xinference/ui/web/ui/package.json +0 -1
  293. xinference/ui/web/ui/src/locales/en.json +9 -3
  294. xinference/ui/web/ui/src/locales/ja.json +9 -3
  295. xinference/ui/web/ui/src/locales/ko.json +9 -3
  296. xinference/ui/web/ui/src/locales/zh.json +9 -3
  297. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/METADATA +24 -4
  298. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/RECORD +302 -76
  299. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  300. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  301. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  302. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  303. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  304. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  305. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  306. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  307. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  308. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  309. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  310. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  311. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  312. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  313. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  314. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  315. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  316. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  317. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  318. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  319. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  320. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  321. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  322. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  323. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  324. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  325. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  326. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  327. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  328. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  329. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  330. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  331. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  332. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  333. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  334. {xinference-1.9.1.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ import json
2
+ import shlex
3
+ import subprocess
4
+ import tempfile
5
+ from pathlib import Path
6
+ from typing import Tuple
7
+
8
+ import ffmpy
9
+ import numpy as np
10
+ import torch
11
+
12
+
13
+ def r128stats(filepath: str, quiet: bool):
14
+ """Takes a path to an audio file, returns a dict with the loudness
15
+ stats computed by the ffmpeg ebur128 filter.
16
+
17
+ Parameters
18
+ ----------
19
+ filepath : str
20
+ Path to compute loudness stats on.
21
+ quiet : bool
22
+ Whether to show FFMPEG output during computation.
23
+
24
+ Returns
25
+ -------
26
+ dict
27
+ Dictionary containing loudness stats.
28
+ """
29
+ ffargs = [
30
+ "ffmpeg",
31
+ "-nostats",
32
+ "-i",
33
+ filepath,
34
+ "-filter_complex",
35
+ "ebur128",
36
+ "-f",
37
+ "null",
38
+ "-",
39
+ ]
40
+ if quiet:
41
+ ffargs += ["-hide_banner"]
42
+ proc = subprocess.Popen(ffargs, stderr=subprocess.PIPE, universal_newlines=True)
43
+ stats = proc.communicate()[1]
44
+ summary_index = stats.rfind("Summary:")
45
+
46
+ summary_list = stats[summary_index:].split()
47
+ i_lufs = float(summary_list[summary_list.index("I:") + 1])
48
+ i_thresh = float(summary_list[summary_list.index("I:") + 4])
49
+ lra = float(summary_list[summary_list.index("LRA:") + 1])
50
+ lra_thresh = float(summary_list[summary_list.index("LRA:") + 4])
51
+ lra_low = float(summary_list[summary_list.index("low:") + 1])
52
+ lra_high = float(summary_list[summary_list.index("high:") + 1])
53
+ stats_dict = {
54
+ "I": i_lufs,
55
+ "I Threshold": i_thresh,
56
+ "LRA": lra,
57
+ "LRA Threshold": lra_thresh,
58
+ "LRA Low": lra_low,
59
+ "LRA High": lra_high,
60
+ }
61
+
62
+ return stats_dict
63
+
64
+
65
+ def ffprobe_offset_and_codec(path: str) -> Tuple[float, str]:
66
+ """Given a path to a file, returns the start time offset and codec of
67
+ the first audio stream.
68
+ """
69
+ ff = ffmpy.FFprobe(
70
+ inputs={path: None},
71
+ global_options="-show_entries format=start_time:stream=duration,start_time,codec_type,codec_name,start_pts,time_base -of json -v quiet",
72
+ )
73
+ streams = json.loads(ff.run(stdout=subprocess.PIPE)[0])["streams"]
74
+ seconds_offset = 0.0
75
+ codec = None
76
+
77
+ # Get the offset and codec of the first audio stream we find
78
+ # and return its start time, if it has one.
79
+ for stream in streams:
80
+ if stream["codec_type"] == "audio":
81
+ seconds_offset = stream.get("start_time", 0.0)
82
+ codec = stream.get("codec_name")
83
+ break
84
+ return float(seconds_offset), codec
85
+
86
+
87
+ class FFMPEGMixin:
88
+ _loudness = None
89
+
90
+ def ffmpeg_loudness(self, quiet: bool = True):
91
+ """Computes loudness of audio file using FFMPEG.
92
+
93
+ Parameters
94
+ ----------
95
+ quiet : bool, optional
96
+ Whether to show FFMPEG output during computation,
97
+ by default True
98
+
99
+ Returns
100
+ -------
101
+ torch.Tensor
102
+ Loudness of every item in the batch, computed via
103
+ FFMPEG.
104
+ """
105
+ loudness = []
106
+
107
+ with tempfile.NamedTemporaryFile(suffix=".wav") as f:
108
+ for i in range(self.batch_size):
109
+ self[i].write(f.name)
110
+ loudness_stats = r128stats(f.name, quiet=quiet)
111
+ loudness.append(loudness_stats["I"])
112
+
113
+ self._loudness = torch.from_numpy(np.array(loudness)).float()
114
+ return self.loudness()
115
+
116
+ def ffmpeg_resample(self, sample_rate: int, quiet: bool = True):
117
+ """Resamples AudioSignal using FFMPEG. More memory-efficient
118
+ than using julius.resample for long audio files.
119
+
120
+ Parameters
121
+ ----------
122
+ sample_rate : int
123
+ Sample rate to resample to.
124
+ quiet : bool, optional
125
+ Whether to show FFMPEG output during computation,
126
+ by default True
127
+
128
+ Returns
129
+ -------
130
+ AudioSignal
131
+ Resampled AudioSignal.
132
+ """
133
+ from audiotools import AudioSignal
134
+
135
+ if sample_rate == self.sample_rate:
136
+ return self
137
+
138
+ with tempfile.NamedTemporaryFile(suffix=".wav") as f:
139
+ self.write(f.name)
140
+ f_out = f.name.replace("wav", "rs.wav")
141
+ command = f"ffmpeg -i {f.name} -ar {sample_rate} {f_out}"
142
+ if quiet:
143
+ command += " -hide_banner -loglevel error"
144
+ subprocess.check_call(shlex.split(command))
145
+ resampled = AudioSignal(f_out)
146
+ Path.unlink(Path(f_out))
147
+ return resampled
148
+
149
+ @classmethod
150
+ def load_from_file_with_ffmpeg(cls, audio_path: str, quiet: bool = True, **kwargs):
151
+ """Loads AudioSignal object after decoding it to a wav file using FFMPEG.
152
+ Useful for loading audio that isn't covered by librosa's loading mechanism. Also
153
+ useful for loading mp3 files, without any offset.
154
+
155
+ Parameters
156
+ ----------
157
+ audio_path : str
158
+ Path to load AudioSignal from.
159
+ quiet : bool, optional
160
+ Whether to show FFMPEG output during computation,
161
+ by default True
162
+
163
+ Returns
164
+ -------
165
+ AudioSignal
166
+ AudioSignal loaded from file with FFMPEG.
167
+ """
168
+ audio_path = str(audio_path)
169
+ with tempfile.TemporaryDirectory() as d:
170
+ wav_file = str(Path(d) / "extracted.wav")
171
+ padded_wav = str(Path(d) / "padded.wav")
172
+
173
+ global_options = "-y"
174
+ if quiet:
175
+ global_options += " -loglevel error"
176
+
177
+ ff = ffmpy.FFmpeg(
178
+ inputs={audio_path: None},
179
+ # For inputs that are m4a (and others?), the input audio can
180
+ # have samples that don't match the sample rate. This aresample
181
+ # option forces ffmpeg to read timing information in the source
182
+ # file instead of assuming constant sample rate.
183
+ #
184
+ # This fixes an issue where an input m4a file might be a
185
+ # different length than the output wav file
186
+ outputs={wav_file: "-af aresample=async=1000"},
187
+ global_options=global_options,
188
+ )
189
+ ff.run()
190
+
191
+ # We pad the file using the start time offset in case it's an audio
192
+ # stream starting at some offset in a video container.
193
+ pad, codec = ffprobe_offset_and_codec(audio_path)
194
+
195
+ # For mp3s, don't pad files with discrepancies less than 0.027s -
196
+ # it's likely due to codec latency. The amount of latency introduced
197
+ # by mp3 is 1152, which is 0.0261 44khz. So we set the threshold
198
+ # here slightly above that.
199
+ # Source: https://lame.sourceforge.io/tech-FAQ.txt.
200
+ if codec == "mp3" and pad < 0.027:
201
+ pad = 0.0
202
+ ff = ffmpy.FFmpeg(
203
+ inputs={wav_file: None},
204
+ outputs={padded_wav: f"-af 'adelay={pad*1000}:all=true'"},
205
+ global_options=global_options,
206
+ )
207
+ ff.run()
208
+
209
+ signal = cls(padded_wav, **kwargs)
210
+
211
+ return signal
@@ -0,0 +1,320 @@
1
+ import copy
2
+
3
+ import julius
4
+ import numpy as np
5
+ import scipy
6
+ import torch
7
+ import torch.nn.functional as F
8
+ import torchaudio
9
+
10
+
11
+ class Meter(torch.nn.Module):
12
+ """Tensorized version of pyloudnorm.Meter. Works with batched audio tensors.
13
+
14
+ Parameters
15
+ ----------
16
+ rate : int
17
+ Sample rate of audio.
18
+ filter_class : str, optional
19
+ Class of weighting filter used.
20
+ K-weighting' (default), 'Fenton/Lee 1'
21
+ 'Fenton/Lee 2', 'Dash et al.'
22
+ by default "K-weighting"
23
+ block_size : float, optional
24
+ Gating block size in seconds, by default 0.400
25
+ zeros : int, optional
26
+ Number of zeros to use in FIR approximation of
27
+ IIR filters, by default 512
28
+ use_fir : bool, optional
29
+ Whether to use FIR approximation or exact IIR formulation.
30
+ If computing on GPU, ``use_fir=True`` will be used, as its
31
+ much faster, by default False
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ rate: int,
37
+ filter_class: str = "K-weighting",
38
+ block_size: float = 0.400,
39
+ zeros: int = 512,
40
+ use_fir: bool = False,
41
+ ):
42
+ super().__init__()
43
+
44
+ self.rate = rate
45
+ self.filter_class = filter_class
46
+ self.block_size = block_size
47
+ self.use_fir = use_fir
48
+
49
+ G = torch.from_numpy(np.array([1.0, 1.0, 1.0, 1.41, 1.41]))
50
+ self.register_buffer("G", G)
51
+
52
+ # Compute impulse responses so that filtering is fast via
53
+ # a convolution at runtime, on GPU, unlike lfilter.
54
+ impulse = np.zeros((zeros,))
55
+ impulse[..., 0] = 1.0
56
+
57
+ firs = np.zeros((len(self._filters), 1, zeros))
58
+ passband_gain = torch.zeros(len(self._filters))
59
+
60
+ for i, (_, filter_stage) in enumerate(self._filters.items()):
61
+ firs[i] = scipy.signal.lfilter(filter_stage.b, filter_stage.a, impulse)
62
+ passband_gain[i] = filter_stage.passband_gain
63
+
64
+ firs = torch.from_numpy(firs[..., ::-1].copy()).float()
65
+
66
+ self.register_buffer("firs", firs)
67
+ self.register_buffer("passband_gain", passband_gain)
68
+
69
+ def apply_filter_gpu(self, data: torch.Tensor):
70
+ """Performs FIR approximation of loudness computation.
71
+
72
+ Parameters
73
+ ----------
74
+ data : torch.Tensor
75
+ Audio data of shape (nb, nch, nt).
76
+
77
+ Returns
78
+ -------
79
+ torch.Tensor
80
+ Filtered audio data.
81
+ """
82
+ # Data is of shape (nb, nch, nt)
83
+ # Reshape to (nb*nch, 1, nt)
84
+ nb, nt, nch = data.shape
85
+ data = data.permute(0, 2, 1)
86
+ data = data.reshape(nb * nch, 1, nt)
87
+
88
+ # Apply padding
89
+ pad_length = self.firs.shape[-1]
90
+
91
+ # Apply filtering in sequence
92
+ for i in range(self.firs.shape[0]):
93
+ data = F.pad(data, (pad_length, pad_length))
94
+ data = julius.fftconv.fft_conv1d(data, self.firs[i, None, ...])
95
+ data = self.passband_gain[i] * data
96
+ data = data[..., 1 : nt + 1]
97
+
98
+ data = data.permute(0, 2, 1)
99
+ data = data[:, :nt, :]
100
+ return data
101
+
102
+ def apply_filter_cpu(self, data: torch.Tensor):
103
+ """Performs IIR formulation of loudness computation.
104
+
105
+ Parameters
106
+ ----------
107
+ data : torch.Tensor
108
+ Audio data of shape (nb, nch, nt).
109
+
110
+ Returns
111
+ -------
112
+ torch.Tensor
113
+ Filtered audio data.
114
+ """
115
+ for _, filter_stage in self._filters.items():
116
+ passband_gain = filter_stage.passband_gain
117
+
118
+ a_coeffs = torch.from_numpy(filter_stage.a).float().to(data.device)
119
+ b_coeffs = torch.from_numpy(filter_stage.b).float().to(data.device)
120
+
121
+ _data = data.permute(0, 2, 1)
122
+ filtered = torchaudio.functional.lfilter(
123
+ _data, a_coeffs, b_coeffs, clamp=False
124
+ )
125
+ data = passband_gain * filtered.permute(0, 2, 1)
126
+ return data
127
+
128
+ def apply_filter(self, data: torch.Tensor):
129
+ """Applies filter on either CPU or GPU, depending
130
+ on if the audio is on GPU or is on CPU, or if
131
+ ``self.use_fir`` is True.
132
+
133
+ Parameters
134
+ ----------
135
+ data : torch.Tensor
136
+ Audio data of shape (nb, nch, nt).
137
+
138
+ Returns
139
+ -------
140
+ torch.Tensor
141
+ Filtered audio data.
142
+ """
143
+ if data.is_cuda or self.use_fir:
144
+ data = self.apply_filter_gpu(data)
145
+ else:
146
+ data = self.apply_filter_cpu(data)
147
+ return data
148
+
149
+ def forward(self, data: torch.Tensor):
150
+ """Computes integrated loudness of data.
151
+
152
+ Parameters
153
+ ----------
154
+ data : torch.Tensor
155
+ Audio data of shape (nb, nch, nt).
156
+
157
+ Returns
158
+ -------
159
+ torch.Tensor
160
+ Filtered audio data.
161
+ """
162
+ return self.integrated_loudness(data)
163
+
164
+ def _unfold(self, input_data):
165
+ T_g = self.block_size
166
+ overlap = 0.75 # overlap of 75% of the block duration
167
+ step = 1.0 - overlap # step size by percentage
168
+
169
+ kernel_size = int(T_g * self.rate)
170
+ stride = int(T_g * self.rate * step)
171
+ unfolded = julius.core.unfold(input_data.permute(0, 2, 1), kernel_size, stride)
172
+ unfolded = unfolded.transpose(-1, -2)
173
+
174
+ return unfolded
175
+
176
+ def integrated_loudness(self, data: torch.Tensor):
177
+ """Computes integrated loudness of data.
178
+
179
+ Parameters
180
+ ----------
181
+ data : torch.Tensor
182
+ Audio data of shape (nb, nch, nt).
183
+
184
+ Returns
185
+ -------
186
+ torch.Tensor
187
+ Filtered audio data.
188
+ """
189
+ if not torch.is_tensor(data):
190
+ data = torch.from_numpy(data).float()
191
+ else:
192
+ data = data.float()
193
+
194
+ input_data = copy.copy(data)
195
+ # Data always has a batch and channel dimension.
196
+ # Is of shape (nb, nt, nch)
197
+ if input_data.ndim < 2:
198
+ input_data = input_data.unsqueeze(-1)
199
+ if input_data.ndim < 3:
200
+ input_data = input_data.unsqueeze(0)
201
+
202
+ nb, nt, nch = input_data.shape
203
+
204
+ # Apply frequency weighting filters - account
205
+ # for the acoustic respose of the head and auditory system
206
+ input_data = self.apply_filter(input_data)
207
+
208
+ G = self.G # channel gains
209
+ T_g = self.block_size # 400 ms gating block standard
210
+ Gamma_a = -70.0 # -70 LKFS = absolute loudness threshold
211
+
212
+ unfolded = self._unfold(input_data)
213
+
214
+ z = (1.0 / (T_g * self.rate)) * unfolded.square().sum(2)
215
+ l = -0.691 + 10.0 * torch.log10((G[None, :nch, None] * z).sum(1, keepdim=True))
216
+ l = l.expand_as(z)
217
+
218
+ # find gating block indices above absolute threshold
219
+ z_avg_gated = z
220
+ z_avg_gated[l <= Gamma_a] = 0
221
+ masked = l > Gamma_a
222
+ z_avg_gated = z_avg_gated.sum(2) / masked.sum(2)
223
+
224
+ # calculate the relative threshold value (see eq. 6)
225
+ Gamma_r = (
226
+ -0.691 + 10.0 * torch.log10((z_avg_gated * G[None, :nch]).sum(-1)) - 10.0
227
+ )
228
+ Gamma_r = Gamma_r[:, None, None]
229
+ Gamma_r = Gamma_r.expand(nb, nch, l.shape[-1])
230
+
231
+ # find gating block indices above relative and absolute thresholds (end of eq. 7)
232
+ z_avg_gated = z
233
+ z_avg_gated[l <= Gamma_a] = 0
234
+ z_avg_gated[l <= Gamma_r] = 0
235
+ masked = (l > Gamma_a) * (l > Gamma_r)
236
+ z_avg_gated = z_avg_gated.sum(2) / masked.sum(2)
237
+
238
+ # # Cannot use nan_to_num (pytorch 1.8 does not come with GCP-supported cuda version)
239
+ # z_avg_gated = torch.nan_to_num(z_avg_gated)
240
+ z_avg_gated = torch.where(
241
+ z_avg_gated.isnan(), torch.zeros_like(z_avg_gated), z_avg_gated
242
+ )
243
+ z_avg_gated[z_avg_gated == float("inf")] = float(np.finfo(np.float32).max)
244
+ z_avg_gated[z_avg_gated == -float("inf")] = float(np.finfo(np.float32).min)
245
+
246
+ LUFS = -0.691 + 10.0 * torch.log10((G[None, :nch] * z_avg_gated).sum(1))
247
+ return LUFS.float()
248
+
249
+ @property
250
+ def filter_class(self):
251
+ return self._filter_class
252
+
253
+ @filter_class.setter
254
+ def filter_class(self, value):
255
+ from pyloudnorm import Meter
256
+
257
+ meter = Meter(self.rate)
258
+ meter.filter_class = value
259
+ self._filter_class = value
260
+ self._filters = meter._filters
261
+
262
+
263
+ class LoudnessMixin:
264
+ _loudness = None
265
+ MIN_LOUDNESS = -70
266
+ """Minimum loudness possible."""
267
+
268
+ def loudness(
269
+ self, filter_class: str = "K-weighting", block_size: float = 0.400, **kwargs
270
+ ):
271
+ """Calculates loudness using an implementation of ITU-R BS.1770-4.
272
+ Allows control over gating block size and frequency weighting filters for
273
+ additional control. Measure the integrated gated loudness of a signal.
274
+
275
+ API is derived from PyLoudnorm, but this implementation is ported to PyTorch
276
+ and is tensorized across batches. When on GPU, an FIR approximation of the IIR
277
+ filters is used to compute loudness for speed.
278
+
279
+ Uses the weighting filters and block size defined by the meter
280
+ the integrated loudness is measured based upon the gating algorithm
281
+ defined in the ITU-R BS.1770-4 specification.
282
+
283
+ Parameters
284
+ ----------
285
+ filter_class : str, optional
286
+ Class of weighting filter used.
287
+ K-weighting' (default), 'Fenton/Lee 1'
288
+ 'Fenton/Lee 2', 'Dash et al.'
289
+ by default "K-weighting"
290
+ block_size : float, optional
291
+ Gating block size in seconds, by default 0.400
292
+ kwargs : dict, optional
293
+ Keyword arguments to :py:func:`audiotools.core.loudness.Meter`.
294
+
295
+ Returns
296
+ -------
297
+ torch.Tensor
298
+ Loudness of audio data.
299
+ """
300
+ if self._loudness is not None:
301
+ return self._loudness.to(self.device)
302
+ original_length = self.signal_length
303
+ if self.signal_duration < 0.5:
304
+ pad_len = int((0.5 - self.signal_duration) * self.sample_rate)
305
+ self.zero_pad(0, pad_len)
306
+
307
+ # create BS.1770 meter
308
+ meter = Meter(
309
+ self.sample_rate, filter_class=filter_class, block_size=block_size, **kwargs
310
+ )
311
+ meter = meter.to(self.device)
312
+ # measure loudness
313
+ loudness = meter.integrated_loudness(self.audio_data.permute(0, 2, 1))
314
+ self.truncate_samples(original_length)
315
+ min_loudness = (
316
+ torch.ones_like(loudness, device=loudness.device) * self.MIN_LOUDNESS
317
+ )
318
+ self._loudness = torch.maximum(loudness, min_loudness)
319
+
320
+ return self._loudness.to(self.device)