xinference 1.10.0__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (317) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +11 -28
  3. xinference/client/restful/async_restful_client.py +20 -3
  4. xinference/client/restful/restful_client.py +20 -3
  5. xinference/core/supervisor.py +87 -53
  6. xinference/core/worker.py +10 -0
  7. xinference/deploy/cmdline.py +15 -0
  8. xinference/model/audio/core.py +21 -6
  9. xinference/model/audio/indextts2.py +166 -0
  10. xinference/model/audio/model_spec.json +38 -1
  11. xinference/model/image/model_spec.json +69 -0
  12. xinference/model/image/stable_diffusion/core.py +13 -4
  13. xinference/model/llm/__init__.py +4 -0
  14. xinference/model/llm/llm_family.json +464 -2
  15. xinference/model/llm/sglang/core.py +30 -11
  16. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +94 -32
  17. xinference/model/llm/transformers/multimodal/qwen2_vl.py +34 -8
  18. xinference/model/llm/utils.py +12 -9
  19. xinference/model/llm/vllm/core.py +93 -17
  20. xinference/thirdparty/audiotools/__init__.py +10 -0
  21. xinference/thirdparty/audiotools/core/__init__.py +4 -0
  22. xinference/thirdparty/audiotools/core/audio_signal.py +1682 -0
  23. xinference/thirdparty/audiotools/core/display.py +194 -0
  24. xinference/thirdparty/audiotools/core/dsp.py +390 -0
  25. xinference/thirdparty/audiotools/core/effects.py +647 -0
  26. xinference/thirdparty/audiotools/core/ffmpeg.py +211 -0
  27. xinference/thirdparty/audiotools/core/loudness.py +320 -0
  28. xinference/thirdparty/audiotools/core/playback.py +252 -0
  29. xinference/thirdparty/audiotools/core/templates/__init__.py +0 -0
  30. xinference/thirdparty/audiotools/core/templates/headers.html +322 -0
  31. xinference/thirdparty/audiotools/core/templates/pandoc.css +407 -0
  32. xinference/thirdparty/audiotools/core/templates/widget.html +52 -0
  33. xinference/thirdparty/audiotools/core/util.py +671 -0
  34. xinference/thirdparty/audiotools/core/whisper.py +97 -0
  35. xinference/thirdparty/audiotools/data/__init__.py +3 -0
  36. xinference/thirdparty/audiotools/data/datasets.py +517 -0
  37. xinference/thirdparty/audiotools/data/preprocess.py +81 -0
  38. xinference/thirdparty/audiotools/data/transforms.py +1592 -0
  39. xinference/thirdparty/audiotools/metrics/__init__.py +6 -0
  40. xinference/thirdparty/audiotools/metrics/distance.py +131 -0
  41. xinference/thirdparty/audiotools/metrics/quality.py +159 -0
  42. xinference/thirdparty/audiotools/metrics/spectral.py +247 -0
  43. xinference/thirdparty/audiotools/ml/__init__.py +5 -0
  44. xinference/thirdparty/audiotools/ml/accelerator.py +184 -0
  45. xinference/thirdparty/audiotools/ml/decorators.py +440 -0
  46. xinference/thirdparty/audiotools/ml/experiment.py +90 -0
  47. xinference/thirdparty/audiotools/ml/layers/__init__.py +2 -0
  48. xinference/thirdparty/audiotools/ml/layers/base.py +328 -0
  49. xinference/thirdparty/audiotools/ml/layers/spectral_gate.py +127 -0
  50. xinference/thirdparty/audiotools/post.py +140 -0
  51. xinference/thirdparty/audiotools/preference.py +600 -0
  52. xinference/thirdparty/indextts/BigVGAN/ECAPA_TDNN.py +656 -0
  53. xinference/thirdparty/indextts/BigVGAN/__init__.py +0 -0
  54. xinference/thirdparty/indextts/BigVGAN/activations.py +122 -0
  55. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/__init__.py +0 -0
  56. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/.gitignore +1 -0
  57. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/__init__.py +0 -0
  58. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/activation1d.py +76 -0
  59. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  60. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/anti_alias_activation_cuda.cu +256 -0
  61. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/compat.h +29 -0
  62. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/load.py +121 -0
  63. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/cuda/type_shim.h +92 -0
  64. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/__init__.py +6 -0
  65. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/act.py +31 -0
  66. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/filter.py +102 -0
  67. xinference/thirdparty/indextts/BigVGAN/alias_free_activation/torch/resample.py +58 -0
  68. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/__init__.py +6 -0
  69. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/act.py +29 -0
  70. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/filter.py +96 -0
  71. xinference/thirdparty/indextts/BigVGAN/alias_free_torch/resample.py +49 -0
  72. xinference/thirdparty/indextts/BigVGAN/bigvgan.py +534 -0
  73. xinference/thirdparty/indextts/BigVGAN/models.py +451 -0
  74. xinference/thirdparty/indextts/BigVGAN/nnet/CNN.py +546 -0
  75. xinference/thirdparty/indextts/BigVGAN/nnet/__init__.py +0 -0
  76. xinference/thirdparty/indextts/BigVGAN/nnet/linear.py +89 -0
  77. xinference/thirdparty/indextts/BigVGAN/nnet/normalization.py +670 -0
  78. xinference/thirdparty/indextts/BigVGAN/utils.py +101 -0
  79. xinference/thirdparty/indextts/__init__.py +0 -0
  80. xinference/thirdparty/indextts/cli.py +65 -0
  81. xinference/thirdparty/indextts/gpt/__init__.py +0 -0
  82. xinference/thirdparty/indextts/gpt/conformer/__init__.py +0 -0
  83. xinference/thirdparty/indextts/gpt/conformer/attention.py +312 -0
  84. xinference/thirdparty/indextts/gpt/conformer/embedding.py +163 -0
  85. xinference/thirdparty/indextts/gpt/conformer/subsampling.py +348 -0
  86. xinference/thirdparty/indextts/gpt/conformer_encoder.py +520 -0
  87. xinference/thirdparty/indextts/gpt/model.py +713 -0
  88. xinference/thirdparty/indextts/gpt/model_v2.py +747 -0
  89. xinference/thirdparty/indextts/gpt/perceiver.py +317 -0
  90. xinference/thirdparty/indextts/gpt/transformers_beam_search.py +1013 -0
  91. xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +4747 -0
  92. xinference/thirdparty/indextts/gpt/transformers_gpt2.py +1878 -0
  93. xinference/thirdparty/indextts/gpt/transformers_modeling_utils.py +5525 -0
  94. xinference/thirdparty/indextts/infer.py +690 -0
  95. xinference/thirdparty/indextts/infer_v2.py +739 -0
  96. xinference/thirdparty/indextts/s2mel/dac/__init__.py +16 -0
  97. xinference/thirdparty/indextts/s2mel/dac/__main__.py +36 -0
  98. xinference/thirdparty/indextts/s2mel/dac/model/__init__.py +4 -0
  99. xinference/thirdparty/indextts/s2mel/dac/model/base.py +294 -0
  100. xinference/thirdparty/indextts/s2mel/dac/model/dac.py +400 -0
  101. xinference/thirdparty/indextts/s2mel/dac/model/discriminator.py +228 -0
  102. xinference/thirdparty/indextts/s2mel/dac/model/encodec.py +320 -0
  103. xinference/thirdparty/indextts/s2mel/dac/nn/__init__.py +3 -0
  104. xinference/thirdparty/indextts/s2mel/dac/nn/layers.py +33 -0
  105. xinference/thirdparty/indextts/s2mel/dac/nn/loss.py +368 -0
  106. xinference/thirdparty/indextts/s2mel/dac/nn/quantize.py +339 -0
  107. xinference/thirdparty/indextts/s2mel/dac/utils/__init__.py +123 -0
  108. xinference/thirdparty/indextts/s2mel/dac/utils/decode.py +95 -0
  109. xinference/thirdparty/indextts/s2mel/dac/utils/encode.py +94 -0
  110. xinference/thirdparty/indextts/s2mel/hf_utils.py +12 -0
  111. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/__init__.py +5 -0
  112. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/act.py +29 -0
  113. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/filter.py +96 -0
  114. xinference/thirdparty/indextts/s2mel/modules/alias_free_torch/resample.py +57 -0
  115. xinference/thirdparty/indextts/s2mel/modules/audio.py +82 -0
  116. xinference/thirdparty/indextts/s2mel/modules/bigvgan/activations.py +120 -0
  117. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/__init__.py +0 -0
  118. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/activation1d.py +77 -0
  119. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp +23 -0
  120. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu +246 -0
  121. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/compat.h +29 -0
  122. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/load.py +86 -0
  123. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/cuda/type_shim.h +92 -0
  124. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/__init__.py +6 -0
  125. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/act.py +30 -0
  126. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/filter.py +101 -0
  127. xinference/thirdparty/indextts/s2mel/modules/bigvgan/alias_free_activation/torch/resample.py +58 -0
  128. xinference/thirdparty/indextts/s2mel/modules/bigvgan/bigvgan.py +492 -0
  129. xinference/thirdparty/indextts/s2mel/modules/bigvgan/config.json +63 -0
  130. xinference/thirdparty/indextts/s2mel/modules/bigvgan/env.py +18 -0
  131. xinference/thirdparty/indextts/s2mel/modules/bigvgan/meldataset.py +354 -0
  132. xinference/thirdparty/indextts/s2mel/modules/bigvgan/utils.py +99 -0
  133. xinference/thirdparty/indextts/s2mel/modules/campplus/DTDNN.py +115 -0
  134. xinference/thirdparty/indextts/s2mel/modules/campplus/classifier.py +70 -0
  135. xinference/thirdparty/indextts/s2mel/modules/campplus/layers.py +253 -0
  136. xinference/thirdparty/indextts/s2mel/modules/commons.py +632 -0
  137. xinference/thirdparty/indextts/s2mel/modules/diffusion_transformer.py +257 -0
  138. xinference/thirdparty/indextts/s2mel/modules/encodec.py +292 -0
  139. xinference/thirdparty/indextts/s2mel/modules/flow_matching.py +171 -0
  140. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/generate.py +436 -0
  141. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/model.py +360 -0
  142. xinference/thirdparty/indextts/s2mel/modules/gpt_fast/quantize.py +622 -0
  143. xinference/thirdparty/indextts/s2mel/modules/hifigan/f0_predictor.py +55 -0
  144. xinference/thirdparty/indextts/s2mel/modules/hifigan/generator.py +454 -0
  145. xinference/thirdparty/indextts/s2mel/modules/layers.py +354 -0
  146. xinference/thirdparty/indextts/s2mel/modules/length_regulator.py +141 -0
  147. xinference/thirdparty/indextts/s2mel/modules/openvoice/__init__.py +0 -0
  148. xinference/thirdparty/indextts/s2mel/modules/openvoice/api.py +186 -0
  149. xinference/thirdparty/indextts/s2mel/modules/openvoice/attentions.py +465 -0
  150. xinference/thirdparty/indextts/s2mel/modules/openvoice/checkpoints_v2/converter/config.json +57 -0
  151. xinference/thirdparty/indextts/s2mel/modules/openvoice/commons.py +160 -0
  152. xinference/thirdparty/indextts/s2mel/modules/openvoice/mel_processing.py +183 -0
  153. xinference/thirdparty/indextts/s2mel/modules/openvoice/models.py +499 -0
  154. xinference/thirdparty/indextts/s2mel/modules/openvoice/modules.py +598 -0
  155. xinference/thirdparty/indextts/s2mel/modules/openvoice/openvoice_app.py +275 -0
  156. xinference/thirdparty/indextts/s2mel/modules/openvoice/se_extractor.py +153 -0
  157. xinference/thirdparty/indextts/s2mel/modules/openvoice/transforms.py +209 -0
  158. xinference/thirdparty/indextts/s2mel/modules/openvoice/utils.py +194 -0
  159. xinference/thirdparty/indextts/s2mel/modules/quantize.py +229 -0
  160. xinference/thirdparty/indextts/s2mel/modules/rmvpe.py +631 -0
  161. xinference/thirdparty/indextts/s2mel/modules/vocos/__init__.py +4 -0
  162. xinference/thirdparty/indextts/s2mel/modules/vocos/heads.py +164 -0
  163. xinference/thirdparty/indextts/s2mel/modules/vocos/helpers.py +71 -0
  164. xinference/thirdparty/indextts/s2mel/modules/vocos/loss.py +114 -0
  165. xinference/thirdparty/indextts/s2mel/modules/vocos/models.py +118 -0
  166. xinference/thirdparty/indextts/s2mel/modules/vocos/modules.py +213 -0
  167. xinference/thirdparty/indextts/s2mel/modules/vocos/pretrained.py +51 -0
  168. xinference/thirdparty/indextts/s2mel/modules/vocos/spectral_ops.py +192 -0
  169. xinference/thirdparty/indextts/s2mel/modules/wavenet.py +174 -0
  170. xinference/thirdparty/indextts/s2mel/optimizers.py +96 -0
  171. xinference/thirdparty/indextts/s2mel/wav2vecbert_extract.py +148 -0
  172. xinference/thirdparty/indextts/utils/__init__.py +0 -0
  173. xinference/thirdparty/indextts/utils/arch_util.py +120 -0
  174. xinference/thirdparty/indextts/utils/checkpoint.py +34 -0
  175. xinference/thirdparty/indextts/utils/common.py +121 -0
  176. xinference/thirdparty/indextts/utils/feature_extractors.py +50 -0
  177. xinference/thirdparty/indextts/utils/front.py +536 -0
  178. xinference/thirdparty/indextts/utils/maskgct/models/codec/__init__.py +0 -0
  179. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/codec.py +427 -0
  180. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/__init__.py +11 -0
  181. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/factorized_vector_quantize.py +150 -0
  182. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/lookup_free_quantize.py +77 -0
  183. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/residual_vq.py +177 -0
  184. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/quantize/vector_quantize.py +401 -0
  185. xinference/thirdparty/indextts/utils/maskgct/models/codec/amphion_codec/vocos.py +881 -0
  186. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_dataset.py +264 -0
  187. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_inference.py +515 -0
  188. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_sampler.py +126 -0
  189. xinference/thirdparty/indextts/utils/maskgct/models/codec/codec_trainer.py +166 -0
  190. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/__init__.py +0 -0
  191. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/__init__.py +5 -0
  192. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/act.py +29 -0
  193. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/filter.py +96 -0
  194. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/alias_free_torch/resample.py +57 -0
  195. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_dataset.py +98 -0
  196. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_inference.py +137 -0
  197. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/facodec_trainer.py +776 -0
  198. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/__init__.py +1 -0
  199. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/bst.t7 +0 -0
  200. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/JDC/model.py +219 -0
  201. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/attentions.py +437 -0
  202. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/commons.py +331 -0
  203. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/gradient_reversal.py +35 -0
  204. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/layers.py +460 -0
  205. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/quantize.py +741 -0
  206. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/style_encoder.py +110 -0
  207. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/modules/wavenet.py +224 -0
  208. xinference/thirdparty/indextts/utils/maskgct/models/codec/facodec/optimizer.py +104 -0
  209. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/repcodec_model.py +210 -0
  210. xinference/thirdparty/indextts/utils/maskgct/models/codec/kmeans/vocos.py +850 -0
  211. xinference/thirdparty/indextts/utils/maskgct/models/codec/melvqgan/melspec.py +108 -0
  212. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/README.md +216 -0
  213. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/__init__.py +6 -0
  214. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/__init__.py +5 -0
  215. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/act.py +29 -0
  216. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/filter.py +96 -0
  217. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/alias_free_torch/resample.py +57 -0
  218. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/facodec.py +1222 -0
  219. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/gradient_reversal.py +35 -0
  220. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/melspec.py +102 -0
  221. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/__init__.py +7 -0
  222. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/fvq.py +116 -0
  223. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/quantize/rvq.py +87 -0
  224. xinference/thirdparty/indextts/utils/maskgct/models/codec/ns3_codec/transformer.py +234 -0
  225. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/model.py +184 -0
  226. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/__init__.py +27 -0
  227. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/conv.py +346 -0
  228. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/lstm.py +46 -0
  229. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/norm.py +37 -0
  230. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/__init__.py +14 -0
  231. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/ac.py +317 -0
  232. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/core_vq.py +388 -0
  233. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/distrib.py +135 -0
  234. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/quantization/vq.py +125 -0
  235. xinference/thirdparty/indextts/utils/maskgct/models/codec/speechtokenizer/modules/seanet.py +414 -0
  236. xinference/thirdparty/indextts/utils/maskgct/models/codec/vevo/vevo_repcodec.py +592 -0
  237. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/ckpt/wav2vec2bert_stats.pt +0 -0
  238. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/llama_nar.py +650 -0
  239. xinference/thirdparty/indextts/utils/maskgct/models/tts/maskgct/maskgct_s2a.py +503 -0
  240. xinference/thirdparty/indextts/utils/maskgct_utils.py +259 -0
  241. xinference/thirdparty/indextts/utils/text_utils.py +41 -0
  242. xinference/thirdparty/indextts/utils/typical_sampling.py +30 -0
  243. xinference/thirdparty/indextts/utils/utils.py +93 -0
  244. xinference/thirdparty/indextts/utils/webui_utils.py +42 -0
  245. xinference/thirdparty/indextts/utils/xtransformers.py +1247 -0
  246. xinference/thirdparty/indextts/vqvae/__init__.py +0 -0
  247. xinference/thirdparty/indextts/vqvae/xtts_dvae.py +395 -0
  248. xinference/ui/gradio/media_interface.py +66 -8
  249. xinference/ui/web/ui/build/asset-manifest.json +6 -6
  250. xinference/ui/web/ui/build/index.html +1 -1
  251. xinference/ui/web/ui/build/static/css/main.5ea97072.css +2 -0
  252. xinference/ui/web/ui/build/static/css/main.5ea97072.css.map +1 -0
  253. xinference/ui/web/ui/build/static/js/main.d192c4f3.js +3 -0
  254. xinference/ui/web/ui/build/static/js/{main.1086c759.js.LICENSE.txt → main.d192c4f3.js.LICENSE.txt} +0 -7
  255. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +1 -0
  256. xinference/ui/web/ui/node_modules/.cache/babel-loader/089c38df5f52348d212ed868dda5c518a42e0c2762caed4175487c0405830c35.json +1 -0
  257. xinference/ui/web/ui/node_modules/.cache/babel-loader/2b6e3a5b6eb2c5c5f2d007e68cd46c372721cd52bf63508adcdb21ecf79241d8.json +1 -0
  258. xinference/ui/web/ui/node_modules/.cache/babel-loader/2d887825fd07a56f872eda4420da25fba0b5b62a23bdcc6c6da1a5281887f618.json +1 -0
  259. xinference/ui/web/ui/node_modules/.cache/babel-loader/4001f9c3e64e73a4f2158826650c174a59d5e3f89ddecddf17cbb6bb688cc4ca.json +1 -0
  260. xinference/ui/web/ui/node_modules/.cache/babel-loader/4a7018a69e6b7f90fc313248c2aa86f2a8f1eb1db120df586047a8023549b44b.json +1 -0
  261. xinference/ui/web/ui/node_modules/.cache/babel-loader/64b12aaa1c1d1bf53820ada8a63769067c0ccc5aab46b32348eb1917ae7f2a11.json +1 -0
  262. xinference/ui/web/ui/node_modules/.cache/babel-loader/7275b67c78ec76ce38a686bb8a576d8c9cecf54e1573614c84859d538efb9be5.json +1 -0
  263. xinference/ui/web/ui/node_modules/.cache/babel-loader/a68b6ee3b31eadc051fb95ce8f8ccb9c2e8b52c60f290dbab545a1917e065282.json +1 -0
  264. xinference/ui/web/ui/node_modules/.cache/babel-loader/ae8771cc37693feb160fa8727231312a0c54ef2d1d1ca893be568cd70016ca7e.json +1 -0
  265. xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +1 -0
  266. xinference/ui/web/ui/node_modules/.cache/babel-loader/be6aada1ee4adc2bbf65dbe56d17db32bb3b5478be05d6b527805a8ba6cfb2b9.json +1 -0
  267. xinference/ui/web/ui/node_modules/.cache/babel-loader/de91c352653c233cf0cb6674e6e04049a44fd0e1156560de65d5c4620521391e.json +1 -0
  268. xinference/ui/web/ui/node_modules/.cache/babel-loader/e85f7002fc325c83b9c9cd8a1619e5b3ebc701d30e811afc284b88e6ae710cb5.json +1 -0
  269. xinference/ui/web/ui/node_modules/.cache/babel-loader/e8b603c78944bf3d213639078bfe155ff5c0dfa4048a93cbb967cad6a4eb4ff3.json +1 -0
  270. xinference/ui/web/ui/node_modules/.cache/babel-loader/f05535160a508b2a312de546a6de234776c613db276479ea4253c0b1bdeeb7d6.json +1 -0
  271. xinference/ui/web/ui/node_modules/.cache/babel-loader/f09ba9e11106bd59a0de10cc85c55084097729dcab575f43dfcf07375961ed87.json +1 -0
  272. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +1 -0
  273. xinference/ui/web/ui/node_modules/.package-lock.json +0 -33
  274. xinference/ui/web/ui/package-lock.json +0 -34
  275. xinference/ui/web/ui/package.json +0 -1
  276. xinference/ui/web/ui/src/locales/en.json +9 -3
  277. xinference/ui/web/ui/src/locales/ja.json +9 -3
  278. xinference/ui/web/ui/src/locales/ko.json +9 -3
  279. xinference/ui/web/ui/src/locales/zh.json +9 -3
  280. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/METADATA +18 -2
  281. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/RECORD +285 -67
  282. xinference/ui/web/ui/build/static/css/main.013f296b.css +0 -2
  283. xinference/ui/web/ui/build/static/css/main.013f296b.css.map +0 -1
  284. xinference/ui/web/ui/build/static/js/main.1086c759.js +0 -3
  285. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +0 -1
  286. xinference/ui/web/ui/node_modules/.cache/babel-loader/0b0f77000cc1b482ca091cfbcae511dfe02f08916971645fad21d0b1234d04a2.json +0 -1
  287. xinference/ui/web/ui/node_modules/.cache/babel-loader/1c5f8ff423a7c9202bea60b15680f04b1e9964b445b0da3f86c6ff70cf24e797.json +0 -1
  288. xinference/ui/web/ui/node_modules/.cache/babel-loader/44ce7993e344980e3ed4f13e8f69237d4a5dfc60e37ca6b54f51f8ee1357bd67.json +0 -1
  289. xinference/ui/web/ui/node_modules/.cache/babel-loader/4aec1cc414ac3ebb3481d3d915e4db597d9127de813291346eacb8554ab170d4.json +0 -1
  290. xinference/ui/web/ui/node_modules/.cache/babel-loader/644cfec52f3c57a6e222ce60f112237a1efefe9835efd9aad857a685f53d8eed.json +0 -1
  291. xinference/ui/web/ui/node_modules/.cache/babel-loader/663436f72af53fe0d72394f56d003fa4e0bba489e5bb4e483fd34b00f84637f7.json +0 -1
  292. xinference/ui/web/ui/node_modules/.cache/babel-loader/69db82ca9bfe27fe417cc6cf2b1716b09be9c6f0cd198530f12bfc60e801bbcf.json +0 -1
  293. xinference/ui/web/ui/node_modules/.cache/babel-loader/85087e27618d740c236bf159f30e0219db443ab55f0997388eed5fde6f9e90cc.json +0 -1
  294. xinference/ui/web/ui/node_modules/.cache/babel-loader/88b07838348864aa86c672be3bbca1e9f58f6f3a2881b32070ec27f4e7b449d1.json +0 -1
  295. xinference/ui/web/ui/node_modules/.cache/babel-loader/8b8cd408ccfbe115acef27ccfa5b233da8597131a2a5712add13e1e4d5d4504b.json +0 -1
  296. xinference/ui/web/ui/node_modules/.cache/babel-loader/a23824fe746b9c6ca5eee9159b5764d1ff1653c1d856288c0f75c742bbb0023b.json +0 -1
  297. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +0 -1
  298. xinference/ui/web/ui/node_modules/.cache/babel-loader/bc1aacc65a102db325ca61bcd2f681e1ae22c36a1f1d98a6ff5e4ad49dc7544f.json +0 -1
  299. xinference/ui/web/ui/node_modules/.cache/babel-loader/c682fd521747c19dae437d83ce3235a306ce6b68e24a117bc57c27ebb8d1f1ca.json +0 -1
  300. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +0 -1
  301. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +0 -1
  302. xinference/ui/web/ui/node_modules/clipboard/.babelrc.json +0 -11
  303. xinference/ui/web/ui/node_modules/clipboard/.eslintrc.json +0 -24
  304. xinference/ui/web/ui/node_modules/clipboard/.prettierrc.json +0 -9
  305. xinference/ui/web/ui/node_modules/clipboard/bower.json +0 -18
  306. xinference/ui/web/ui/node_modules/clipboard/composer.json +0 -25
  307. xinference/ui/web/ui/node_modules/clipboard/package.json +0 -63
  308. xinference/ui/web/ui/node_modules/delegate/package.json +0 -31
  309. xinference/ui/web/ui/node_modules/good-listener/bower.json +0 -11
  310. xinference/ui/web/ui/node_modules/good-listener/package.json +0 -35
  311. xinference/ui/web/ui/node_modules/select/bower.json +0 -13
  312. xinference/ui/web/ui/node_modules/select/package.json +0 -29
  313. xinference/ui/web/ui/node_modules/tiny-emitter/package.json +0 -53
  314. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/WHEEL +0 -0
  315. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/entry_points.txt +0 -0
  316. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/licenses/LICENSE +0 -0
  317. {xinference-1.10.0.dist-info → xinference-1.10.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,194 @@
1
+ import inspect
2
+ import typing
3
+ from functools import wraps
4
+
5
+ from . import util
6
+
7
+
8
+ def format_figure(func):
9
+ """Decorator for formatting figures produced by the code below.
10
+ See :py:func:`audiotools.core.util.format_figure` for more.
11
+
12
+ Parameters
13
+ ----------
14
+ func : Callable
15
+ Plotting function that is decorated by this function.
16
+
17
+ """
18
+
19
+ @wraps(func)
20
+ def wrapper(*args, **kwargs):
21
+ f_keys = inspect.signature(util.format_figure).parameters.keys()
22
+ f_kwargs = {}
23
+ for k, v in list(kwargs.items()):
24
+ if k in f_keys:
25
+ kwargs.pop(k)
26
+ f_kwargs[k] = v
27
+ func(*args, **kwargs)
28
+ util.format_figure(**f_kwargs)
29
+
30
+ return wrapper
31
+
32
+
33
+ class DisplayMixin:
34
+ @format_figure
35
+ def specshow(
36
+ self,
37
+ preemphasis: bool = False,
38
+ x_axis: str = "time",
39
+ y_axis: str = "linear",
40
+ n_mels: int = 128,
41
+ **kwargs,
42
+ ):
43
+ """Displays a spectrogram, using ``librosa.display.specshow``.
44
+
45
+ Parameters
46
+ ----------
47
+ preemphasis : bool, optional
48
+ Whether or not to apply preemphasis, which makes high
49
+ frequency detail easier to see, by default False
50
+ x_axis : str, optional
51
+ How to label the x axis, by default "time"
52
+ y_axis : str, optional
53
+ How to label the y axis, by default "linear"
54
+ n_mels : int, optional
55
+ If displaying a mel spectrogram with ``y_axis = "mel"``,
56
+ this controls the number of mels, by default 128.
57
+ kwargs : dict, optional
58
+ Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
59
+ """
60
+ import librosa
61
+ import librosa.display
62
+
63
+ # Always re-compute the STFT data before showing it, in case
64
+ # it changed.
65
+ signal = self.clone()
66
+ signal.stft_data = None
67
+
68
+ if preemphasis:
69
+ signal.preemphasis()
70
+
71
+ ref = signal.magnitude.max()
72
+ log_mag = signal.log_magnitude(ref_value=ref)
73
+
74
+ if y_axis == "mel":
75
+ log_mag = 20 * signal.mel_spectrogram(n_mels).clamp(1e-5).log10()
76
+ log_mag -= log_mag.max()
77
+
78
+ librosa.display.specshow(
79
+ log_mag.numpy()[0].mean(axis=0),
80
+ x_axis=x_axis,
81
+ y_axis=y_axis,
82
+ sr=signal.sample_rate,
83
+ **kwargs,
84
+ )
85
+
86
+ @format_figure
87
+ def waveplot(self, x_axis: str = "time", **kwargs):
88
+ """Displays a waveform plot, using ``librosa.display.waveshow``.
89
+
90
+ Parameters
91
+ ----------
92
+ x_axis : str, optional
93
+ How to label the x axis, by default "time"
94
+ kwargs : dict, optional
95
+ Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
96
+ """
97
+ import librosa
98
+ import librosa.display
99
+
100
+ audio_data = self.audio_data[0].mean(dim=0)
101
+ audio_data = audio_data.cpu().numpy()
102
+
103
+ plot_fn = "waveshow" if hasattr(librosa.display, "waveshow") else "waveplot"
104
+ wave_plot_fn = getattr(librosa.display, plot_fn)
105
+ wave_plot_fn(audio_data, x_axis=x_axis, sr=self.sample_rate, **kwargs)
106
+
107
+ @format_figure
108
+ def wavespec(self, x_axis: str = "time", **kwargs):
109
+ """Displays a waveform plot, using ``librosa.display.waveshow``.
110
+
111
+ Parameters
112
+ ----------
113
+ x_axis : str, optional
114
+ How to label the x axis, by default "time"
115
+ kwargs : dict, optional
116
+ Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow`.
117
+ """
118
+ import matplotlib.pyplot as plt
119
+ from matplotlib.gridspec import GridSpec
120
+
121
+ gs = GridSpec(6, 1)
122
+ plt.subplot(gs[0, :])
123
+ self.waveplot(x_axis=x_axis)
124
+ plt.subplot(gs[1:, :])
125
+ self.specshow(x_axis=x_axis, **kwargs)
126
+
127
+ def write_audio_to_tb(
128
+ self,
129
+ tag: str,
130
+ writer,
131
+ step: int = None,
132
+ plot_fn: typing.Union[typing.Callable, str] = "specshow",
133
+ **kwargs,
134
+ ):
135
+ """Writes a signal and its spectrogram to Tensorboard. Will show up
136
+ under the Audio and Images tab in Tensorboard.
137
+
138
+ Parameters
139
+ ----------
140
+ tag : str
141
+ Tag to write signal to (e.g. ``clean/sample_0.wav``). The image will be
142
+ written to the corresponding ``.png`` file (e.g. ``clean/sample_0.png``).
143
+ writer : SummaryWriter
144
+ A SummaryWriter object from PyTorch library.
145
+ step : int, optional
146
+ The step to write the signal to, by default None
147
+ plot_fn : typing.Union[typing.Callable, str], optional
148
+ How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
149
+ kwargs : dict, optional
150
+ Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
151
+ whatever ``plot_fn`` is set to.
152
+ """
153
+ import matplotlib.pyplot as plt
154
+
155
+ audio_data = self.audio_data[0, 0].detach().cpu()
156
+ sample_rate = self.sample_rate
157
+ writer.add_audio(tag, audio_data, step, sample_rate)
158
+
159
+ if plot_fn is not None:
160
+ if isinstance(plot_fn, str):
161
+ plot_fn = getattr(self, plot_fn)
162
+ fig = plt.figure()
163
+ plt.clf()
164
+ plot_fn(**kwargs)
165
+ writer.add_figure(tag.replace("wav", "png"), fig, step)
166
+
167
+ def save_image(
168
+ self,
169
+ image_path: str,
170
+ plot_fn: typing.Union[typing.Callable, str] = "specshow",
171
+ **kwargs,
172
+ ):
173
+ """Save AudioSignal spectrogram (or whatever ``plot_fn`` is set to) to
174
+ a specified file.
175
+
176
+ Parameters
177
+ ----------
178
+ image_path : str
179
+ Where to save the file to.
180
+ plot_fn : typing.Union[typing.Callable, str], optional
181
+ How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
182
+ kwargs : dict, optional
183
+ Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
184
+ whatever ``plot_fn`` is set to.
185
+ """
186
+ import matplotlib.pyplot as plt
187
+
188
+ if isinstance(plot_fn, str):
189
+ plot_fn = getattr(self, plot_fn)
190
+
191
+ plt.clf()
192
+ plot_fn(**kwargs)
193
+ plt.savefig(image_path, bbox_inches="tight", pad_inches=0)
194
+ plt.close()
@@ -0,0 +1,390 @@
1
+ import typing
2
+
3
+ import julius
4
+ import numpy as np
5
+ import torch
6
+
7
+ from . import util
8
+
9
+
10
+ class DSPMixin:
11
+ _original_batch_size = None
12
+ _original_num_channels = None
13
+ _padded_signal_length = None
14
+
15
+ def _preprocess_signal_for_windowing(self, window_duration, hop_duration):
16
+ self._original_batch_size = self.batch_size
17
+ self._original_num_channels = self.num_channels
18
+
19
+ window_length = int(window_duration * self.sample_rate)
20
+ hop_length = int(hop_duration * self.sample_rate)
21
+
22
+ if window_length % hop_length != 0:
23
+ factor = window_length // hop_length
24
+ window_length = factor * hop_length
25
+
26
+ self.zero_pad(hop_length, hop_length)
27
+ self._padded_signal_length = self.signal_length
28
+
29
+ return window_length, hop_length
30
+
31
+ def windows(
32
+ self, window_duration: float, hop_duration: float, preprocess: bool = True
33
+ ):
34
+ """Generator which yields windows of specified duration from signal with a specified
35
+ hop length.
36
+
37
+ Parameters
38
+ ----------
39
+ window_duration : float
40
+ Duration of every window in seconds.
41
+ hop_duration : float
42
+ Hop between windows in seconds.
43
+ preprocess : bool, optional
44
+ Whether to preprocess the signal, so that the first sample is in
45
+ the middle of the first window, by default True
46
+
47
+ Yields
48
+ ------
49
+ AudioSignal
50
+ Each window is returned as an AudioSignal.
51
+ """
52
+ if preprocess:
53
+ window_length, hop_length = self._preprocess_signal_for_windowing(
54
+ window_duration, hop_duration
55
+ )
56
+
57
+ self.audio_data = self.audio_data.reshape(-1, 1, self.signal_length)
58
+
59
+ for b in range(self.batch_size):
60
+ i = 0
61
+ start_idx = i * hop_length
62
+ while True:
63
+ start_idx = i * hop_length
64
+ i += 1
65
+ end_idx = start_idx + window_length
66
+ if end_idx > self.signal_length:
67
+ break
68
+ yield self[b, ..., start_idx:end_idx]
69
+
70
+ def collect_windows(
71
+ self, window_duration: float, hop_duration: float, preprocess: bool = True
72
+ ):
73
+ """Reshapes signal into windows of specified duration from signal with a specified
74
+ hop length. Window are placed along the batch dimension. Use with
75
+ :py:func:`audiotools.core.dsp.DSPMixin.overlap_and_add` to reconstruct the
76
+ original signal.
77
+
78
+ Parameters
79
+ ----------
80
+ window_duration : float
81
+ Duration of every window in seconds.
82
+ hop_duration : float
83
+ Hop between windows in seconds.
84
+ preprocess : bool, optional
85
+ Whether to preprocess the signal, so that the first sample is in
86
+ the middle of the first window, by default True
87
+
88
+ Returns
89
+ -------
90
+ AudioSignal
91
+ AudioSignal unfolded with shape ``(nb * nch * num_windows, 1, window_length)``
92
+ """
93
+ if preprocess:
94
+ window_length, hop_length = self._preprocess_signal_for_windowing(
95
+ window_duration, hop_duration
96
+ )
97
+
98
+ # self.audio_data: (nb, nch, nt).
99
+ unfolded = torch.nn.functional.unfold(
100
+ self.audio_data.reshape(-1, 1, 1, self.signal_length),
101
+ kernel_size=(1, window_length),
102
+ stride=(1, hop_length),
103
+ )
104
+ # unfolded: (nb * nch, window_length, num_windows).
105
+ # -> (nb * nch * num_windows, 1, window_length)
106
+ unfolded = unfolded.permute(0, 2, 1).reshape(-1, 1, window_length)
107
+ self.audio_data = unfolded
108
+ return self
109
+
110
+ def overlap_and_add(self, hop_duration: float):
111
+ """Function which takes a list of windows and overlap adds them into a
112
+ signal the same length as ``audio_signal``.
113
+
114
+ Parameters
115
+ ----------
116
+ hop_duration : float
117
+ How much to shift for each window
118
+ (overlap is window_duration - hop_duration) in seconds.
119
+
120
+ Returns
121
+ -------
122
+ AudioSignal
123
+ overlap-and-added signal.
124
+ """
125
+ hop_length = int(hop_duration * self.sample_rate)
126
+ window_length = self.signal_length
127
+
128
+ nb, nch = self._original_batch_size, self._original_num_channels
129
+
130
+ unfolded = self.audio_data.reshape(nb * nch, -1, window_length).permute(0, 2, 1)
131
+ folded = torch.nn.functional.fold(
132
+ unfolded,
133
+ output_size=(1, self._padded_signal_length),
134
+ kernel_size=(1, window_length),
135
+ stride=(1, hop_length),
136
+ )
137
+
138
+ norm = torch.ones_like(unfolded, device=unfolded.device)
139
+ norm = torch.nn.functional.fold(
140
+ norm,
141
+ output_size=(1, self._padded_signal_length),
142
+ kernel_size=(1, window_length),
143
+ stride=(1, hop_length),
144
+ )
145
+
146
+ folded = folded / norm
147
+
148
+ folded = folded.reshape(nb, nch, -1)
149
+ self.audio_data = folded
150
+ self.trim(hop_length, hop_length)
151
+ return self
152
+
153
+ def low_pass(
154
+ self, cutoffs: typing.Union[torch.Tensor, np.ndarray, float], zeros: int = 51
155
+ ):
156
+ """Low-passes the signal in-place. Each item in the batch
157
+ can have a different low-pass cutoff, if the input
158
+ to this signal is an array or tensor. If a float, all
159
+ items are given the same low-pass filter.
160
+
161
+ Parameters
162
+ ----------
163
+ cutoffs : typing.Union[torch.Tensor, np.ndarray, float]
164
+ Cutoff in Hz of low-pass filter.
165
+ zeros : int, optional
166
+ Number of taps to use in low-pass filter, by default 51
167
+
168
+ Returns
169
+ -------
170
+ AudioSignal
171
+ Low-passed AudioSignal.
172
+ """
173
+ cutoffs = util.ensure_tensor(cutoffs, 2, self.batch_size)
174
+ cutoffs = cutoffs / self.sample_rate
175
+ filtered = torch.empty_like(self.audio_data)
176
+
177
+ for i, cutoff in enumerate(cutoffs):
178
+ lp_filter = julius.LowPassFilter(cutoff.cpu(), zeros=zeros).to(self.device)
179
+ filtered[i] = lp_filter(self.audio_data[i])
180
+
181
+ self.audio_data = filtered
182
+ self.stft_data = None
183
+ return self
184
+
185
+ def high_pass(
186
+ self, cutoffs: typing.Union[torch.Tensor, np.ndarray, float], zeros: int = 51
187
+ ):
188
+ """High-passes the signal in-place. Each item in the batch
189
+ can have a different high-pass cutoff, if the input
190
+ to this signal is an array or tensor. If a float, all
191
+ items are given the same high-pass filter.
192
+
193
+ Parameters
194
+ ----------
195
+ cutoffs : typing.Union[torch.Tensor, np.ndarray, float]
196
+ Cutoff in Hz of high-pass filter.
197
+ zeros : int, optional
198
+ Number of taps to use in high-pass filter, by default 51
199
+
200
+ Returns
201
+ -------
202
+ AudioSignal
203
+ High-passed AudioSignal.
204
+ """
205
+ cutoffs = util.ensure_tensor(cutoffs, 2, self.batch_size)
206
+ cutoffs = cutoffs / self.sample_rate
207
+ filtered = torch.empty_like(self.audio_data)
208
+
209
+ for i, cutoff in enumerate(cutoffs):
210
+ hp_filter = julius.HighPassFilter(cutoff.cpu(), zeros=zeros).to(self.device)
211
+ filtered[i] = hp_filter(self.audio_data[i])
212
+
213
+ self.audio_data = filtered
214
+ self.stft_data = None
215
+ return self
216
+
217
+ def mask_frequencies(
218
+ self,
219
+ fmin_hz: typing.Union[torch.Tensor, np.ndarray, float],
220
+ fmax_hz: typing.Union[torch.Tensor, np.ndarray, float],
221
+ val: float = 0.0,
222
+ ):
223
+ """Masks frequencies between ``fmin_hz`` and ``fmax_hz``, and fills them
224
+ with the value specified by ``val``. Useful for implementing SpecAug.
225
+ The min and max can be different for every item in the batch.
226
+
227
+ Parameters
228
+ ----------
229
+ fmin_hz : typing.Union[torch.Tensor, np.ndarray, float]
230
+ Lower end of band to mask out.
231
+ fmax_hz : typing.Union[torch.Tensor, np.ndarray, float]
232
+ Upper end of band to mask out.
233
+ val : float, optional
234
+ Value to fill in, by default 0.0
235
+
236
+ Returns
237
+ -------
238
+ AudioSignal
239
+ Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
240
+ masked audio data.
241
+ """
242
+ # SpecAug
243
+ mag, phase = self.magnitude, self.phase
244
+ fmin_hz = util.ensure_tensor(fmin_hz, ndim=mag.ndim)
245
+ fmax_hz = util.ensure_tensor(fmax_hz, ndim=mag.ndim)
246
+ assert torch.all(fmin_hz < fmax_hz)
247
+
248
+ # build mask
249
+ nbins = mag.shape[-2]
250
+ bins_hz = torch.linspace(0, self.sample_rate / 2, nbins, device=self.device)
251
+ bins_hz = bins_hz[None, None, :, None].repeat(
252
+ self.batch_size, 1, 1, mag.shape[-1]
253
+ )
254
+ mask = (fmin_hz <= bins_hz) & (bins_hz < fmax_hz)
255
+ mask = mask.to(self.device)
256
+
257
+ mag = mag.masked_fill(mask, val)
258
+ phase = phase.masked_fill(mask, val)
259
+ self.stft_data = mag * torch.exp(1j * phase)
260
+ return self
261
+
262
+ def mask_timesteps(
263
+ self,
264
+ tmin_s: typing.Union[torch.Tensor, np.ndarray, float],
265
+ tmax_s: typing.Union[torch.Tensor, np.ndarray, float],
266
+ val: float = 0.0,
267
+ ):
268
+ """Masks timesteps between ``tmin_s`` and ``tmax_s``, and fills them
269
+ with the value specified by ``val``. Useful for implementing SpecAug.
270
+ The min and max can be different for every item in the batch.
271
+
272
+ Parameters
273
+ ----------
274
+ tmin_s : typing.Union[torch.Tensor, np.ndarray, float]
275
+ Lower end of timesteps to mask out.
276
+ tmax_s : typing.Union[torch.Tensor, np.ndarray, float]
277
+ Upper end of timesteps to mask out.
278
+ val : float, optional
279
+ Value to fill in, by default 0.0
280
+
281
+ Returns
282
+ -------
283
+ AudioSignal
284
+ Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
285
+ masked audio data.
286
+ """
287
+ # SpecAug
288
+ mag, phase = self.magnitude, self.phase
289
+ tmin_s = util.ensure_tensor(tmin_s, ndim=mag.ndim)
290
+ tmax_s = util.ensure_tensor(tmax_s, ndim=mag.ndim)
291
+
292
+ assert torch.all(tmin_s < tmax_s)
293
+
294
+ # build mask
295
+ nt = mag.shape[-1]
296
+ bins_t = torch.linspace(0, self.signal_duration, nt, device=self.device)
297
+ bins_t = bins_t[None, None, None, :].repeat(
298
+ self.batch_size, 1, mag.shape[-2], 1
299
+ )
300
+ mask = (tmin_s <= bins_t) & (bins_t < tmax_s)
301
+
302
+ mag = mag.masked_fill(mask, val)
303
+ phase = phase.masked_fill(mask, val)
304
+ self.stft_data = mag * torch.exp(1j * phase)
305
+ return self
306
+
307
+ def mask_low_magnitudes(
308
+ self, db_cutoff: typing.Union[torch.Tensor, np.ndarray, float], val: float = 0.0
309
+ ):
310
+ """Mask away magnitudes below a specified threshold, which
311
+ can be different for every item in the batch.
312
+
313
+ Parameters
314
+ ----------
315
+ db_cutoff : typing.Union[torch.Tensor, np.ndarray, float]
316
+ Decibel value for which things below it will be masked away.
317
+ val : float, optional
318
+ Value to fill in for masked portions, by default 0.0
319
+
320
+ Returns
321
+ -------
322
+ AudioSignal
323
+ Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
324
+ masked audio data.
325
+ """
326
+ mag = self.magnitude
327
+ log_mag = self.log_magnitude()
328
+
329
+ db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
330
+ mask = log_mag < db_cutoff
331
+ mag = mag.masked_fill(mask, val)
332
+
333
+ self.magnitude = mag
334
+ return self
335
+
336
+ def shift_phase(self, shift: typing.Union[torch.Tensor, np.ndarray, float]):
337
+ """Shifts the phase by a constant value.
338
+
339
+ Parameters
340
+ ----------
341
+ shift : typing.Union[torch.Tensor, np.ndarray, float]
342
+ What to shift the phase by.
343
+
344
+ Returns
345
+ -------
346
+ AudioSignal
347
+ Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
348
+ masked audio data.
349
+ """
350
+ shift = util.ensure_tensor(shift, ndim=self.phase.ndim)
351
+ self.phase = self.phase + shift
352
+ return self
353
+
354
+ def corrupt_phase(self, scale: typing.Union[torch.Tensor, np.ndarray, float]):
355
+ """Corrupts the phase randomly by some scaled value.
356
+
357
+ Parameters
358
+ ----------
359
+ scale : typing.Union[torch.Tensor, np.ndarray, float]
360
+ Standard deviation of noise to add to the phase.
361
+
362
+ Returns
363
+ -------
364
+ AudioSignal
365
+ Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
366
+ masked audio data.
367
+ """
368
+ scale = util.ensure_tensor(scale, ndim=self.phase.ndim)
369
+ self.phase = self.phase + scale * torch.randn_like(self.phase)
370
+ return self
371
+
372
+ def preemphasis(self, coef: float = 0.85):
373
+ """Applies pre-emphasis to audio signal.
374
+
375
+ Parameters
376
+ ----------
377
+ coef : float, optional
378
+ How much pre-emphasis to apply, lower values do less. 0 does nothing.
379
+ by default 0.85
380
+
381
+ Returns
382
+ -------
383
+ AudioSignal
384
+ Pre-emphasized signal.
385
+ """
386
+ kernel = torch.tensor([1, -coef, 0]).view(1, 1, -1).to(self.device)
387
+ x = self.audio_data.reshape(-1, 1, self.signal_length)
388
+ x = torch.nn.functional.conv1d(x, kernel, padding=1)
389
+ self.audio_data = x.reshape(*self.audio_data.shape)
390
+ return self