xinference 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +1 -1
  3. xinference/core/image_interface.py +9 -0
  4. xinference/core/model.py +4 -1
  5. xinference/core/worker.py +48 -41
  6. xinference/model/audio/chattts.py +24 -9
  7. xinference/model/audio/core.py +8 -2
  8. xinference/model/audio/fish_speech.py +228 -0
  9. xinference/model/audio/model_spec.json +8 -0
  10. xinference/model/embedding/core.py +23 -1
  11. xinference/model/image/model_spec.json +2 -1
  12. xinference/model/image/model_spec_modelscope.json +2 -1
  13. xinference/model/image/stable_diffusion/core.py +49 -1
  14. xinference/model/llm/__init__.py +6 -0
  15. xinference/model/llm/llm_family.json +54 -9
  16. xinference/model/llm/llm_family.py +2 -0
  17. xinference/model/llm/llm_family_modelscope.json +56 -10
  18. xinference/model/llm/lmdeploy/__init__.py +0 -0
  19. xinference/model/llm/lmdeploy/core.py +557 -0
  20. xinference/model/llm/transformers/cogvlm2.py +4 -45
  21. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  22. xinference/model/llm/transformers/core.py +1 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -23
  24. xinference/model/llm/transformers/intern_vl.py +94 -11
  25. xinference/model/llm/transformers/minicpmv25.py +2 -23
  26. xinference/model/llm/transformers/minicpmv26.py +2 -22
  27. xinference/model/llm/transformers/yi_vl.py +2 -24
  28. xinference/model/llm/utils.py +10 -1
  29. xinference/model/llm/vllm/core.py +1 -1
  30. xinference/thirdparty/fish_speech/__init__.py +0 -0
  31. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  32. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  33. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  34. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  35. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  36. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  37. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  38. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  39. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  40. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  41. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  42. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  43. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  44. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  46. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  48. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  49. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  50. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  51. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  52. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  53. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  54. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  55. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  56. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  57. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  58. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  59. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  60. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  61. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  62. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  63. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  64. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  67. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  68. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  69. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  70. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  71. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  72. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  73. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  74. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  75. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  76. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  77. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  78. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  79. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  83. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  84. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  85. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  86. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  87. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  88. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  89. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  90. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  91. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  92. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  93. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  94. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  95. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  96. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  97. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  98. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  99. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  100. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  101. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  102. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  103. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  104. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  105. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  106. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  107. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  108. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  109. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  110. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  111. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  112. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  113. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  114. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  115. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  116. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  117. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  118. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  119. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  120. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  121. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  122. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  123. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  124. xinference/web/ui/build/asset-manifest.json +3 -3
  125. xinference/web/ui/build/index.html +1 -1
  126. xinference/web/ui/build/static/js/{main.ffc26121.js → main.661c7b0a.js} +3 -3
  127. xinference/web/ui/build/static/js/main.661c7b0a.js.map +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  129. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/METADATA +18 -6
  130. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/RECORD +135 -37
  131. xinference/web/ui/build/static/js/main.ffc26121.js.map +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  133. /xinference/web/ui/build/static/js/{main.ffc26121.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  134. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  135. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  136. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  137. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,176 @@
1
+ """
2
+ Used to transcribe all audio files in one folder into another folder.
3
+ e.g.
4
+ Directory structure:
5
+ --pre_data_root
6
+ ----SP_1
7
+ ------01.wav
8
+ ------02.wav
9
+ ------......
10
+ ----SP_2
11
+ ------01.wav
12
+ ------02.wav
13
+ ------......
14
+ Use
15
+ python tools/whisper_asr.py --audio-dir pre_data_root/SP_1 --save-dir data/SP_1
16
+ to transcribe the first speaker.
17
+
18
+ Use
19
+ python tools/whisper_asr.py --audio-dir pre_data_root/SP_2 --save-dir data/SP_2
20
+ to transcribe the second speaker.
21
+
22
+ Note: Be aware of your audio sample rate, which defaults to 44.1kHz.
23
+ """
24
+
25
+ import re
26
+ from pathlib import Path
27
+
28
+ import click
29
+ import soundfile as sf
30
+ from faster_whisper import WhisperModel
31
+ from loguru import logger
32
+ from pydub import AudioSegment
33
+ from tqdm import tqdm
34
+
35
+ from tools.file import AUDIO_EXTENSIONS, list_files
36
+
37
+
38
+ @click.command()
39
+ @click.option("--model-size", default="large-v3", help="Size of the Whisper model")
40
+ @click.option(
41
+ "--compute-type",
42
+ default="float16",
43
+ help="Computation Precision of the Whisper model [float16 / int8_float16 / int8]",
44
+ )
45
+ @click.option("--audio-dir", required=True, help="Directory containing audio files")
46
+ @click.option(
47
+ "--save-dir", required=True, help="Directory to save processed audio files"
48
+ )
49
+ @click.option(
50
+ "--sample-rate",
51
+ default=44100,
52
+ type=int,
53
+ help="Output sample rate, default to input sample rate",
54
+ )
55
+ @click.option("--device", default="cuda", help="Device to use [cuda / cpu]")
56
+ @click.option("--language", default="auto", help="Language of the transcription")
57
+ @click.option("--initial-prompt", default=None, help="Initial prompt for transcribing")
58
+ def main(
59
+ model_size,
60
+ compute_type,
61
+ audio_dir,
62
+ save_dir,
63
+ sample_rate,
64
+ device,
65
+ language,
66
+ initial_prompt,
67
+ ):
68
+ logger.info("Loading / Downloading Faster Whisper model...")
69
+
70
+ model = WhisperModel(
71
+ model_size,
72
+ device=device,
73
+ compute_type=compute_type,
74
+ download_root="faster_whisper",
75
+ )
76
+
77
+ logger.info("Model loaded.")
78
+
79
+ save_path = Path(save_dir)
80
+ save_path.mkdir(parents=True, exist_ok=True)
81
+
82
+ audio_files = list_files(
83
+ path=audio_dir, extensions=AUDIO_EXTENSIONS, recursive=True
84
+ )
85
+
86
+ for file_path in tqdm(audio_files, desc="Processing audio file"):
87
+ file_stem = file_path.stem
88
+ file_suffix = file_path.suffix
89
+
90
+ rel_path = Path(file_path).relative_to(audio_dir)
91
+ (save_path / rel_path.parent).mkdir(parents=True, exist_ok=True)
92
+
93
+ audio = AudioSegment.from_file(file_path)
94
+
95
+ segments, info = model.transcribe(
96
+ file_path,
97
+ beam_size=5,
98
+ language=None if language == "auto" else language,
99
+ initial_prompt=initial_prompt,
100
+ )
101
+
102
+ print(
103
+ "Detected language '%s' with probability %f"
104
+ % (info.language, info.language_probability)
105
+ )
106
+ print("Total len(ms): ", len(audio))
107
+
108
+ whole_text = None
109
+ for segment in segments:
110
+ id, start, end, text = (
111
+ segment.id,
112
+ segment.start,
113
+ segment.end,
114
+ segment.text,
115
+ )
116
+ print("Segment %03d [%.2fs -> %.2fs] %s" % (id, start, end, text))
117
+ if not whole_text:
118
+ whole_text = text
119
+ else:
120
+ whole_text += ", " + text
121
+
122
+ whole_text += "."
123
+
124
+ audio_save_path = save_path / rel_path.parent / f"{file_stem}{file_suffix}"
125
+ audio.export(audio_save_path, format=file_suffix[1:])
126
+ print(f"Exported {audio_save_path}")
127
+
128
+ transcript_save_path = save_path / rel_path.parent / f"{file_stem}.lab"
129
+ with open(
130
+ transcript_save_path,
131
+ "w",
132
+ encoding="utf-8",
133
+ ) as f:
134
+ f.write(whole_text)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ main()
139
+ exit(0)
140
+
141
+ audio = AudioSegment.from_wav(
142
+ r"D:\PythonProject\原神语音中文\胡桃\vo_hutao_draw_appear.wav"
143
+ )
144
+
145
+ model_size = "large-v3"
146
+
147
+ model = WhisperModel(
148
+ model_size,
149
+ device="cuda",
150
+ compute_type="float16",
151
+ download_root="faster_whisper",
152
+ )
153
+
154
+ segments, info = model.transcribe(
155
+ r"D:\PythonProject\原神语音中文\胡桃\vo_hutao_draw_appear.wav",
156
+ beam_size=5,
157
+ )
158
+
159
+ print(
160
+ "Detected language '%s' with probability %f"
161
+ % (info.language, info.language_probability)
162
+ )
163
+ print("Total len(ms): ", len(audio))
164
+
165
+ for i, segment in enumerate(segments):
166
+ print(
167
+ "Segment %03d [%.2fs -> %.2fs] %s"
168
+ % (i, segment.start, segment.end, segment.text)
169
+ )
170
+ start_ms = int(segment.start * 1000)
171
+ end_ms = int(segment.end * 1000)
172
+ segment_audio = audio[start_ms:end_ms]
173
+ segment_audio.export(f"segment_{i:03d}.wav", format="wav")
174
+ print(f"Exported segment_{i:03d}.wav")
175
+
176
+ print("All segments have been exported.")
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.4bafd904.css",
4
- "main.js": "./static/js/main.ffc26121.js",
4
+ "main.js": "./static/js/main.661c7b0a.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.4bafd904.css.map": "./static/css/main.4bafd904.css.map",
8
- "main.ffc26121.js.map": "./static/js/main.ffc26121.js.map"
8
+ "main.661c7b0a.js.map": "./static/js/main.661c7b0a.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.4bafd904.css",
12
- "static/js/main.ffc26121.js"
12
+ "static/js/main.661c7b0a.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.ffc26121.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.661c7b0a.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>