xinference 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +1 -1
  3. xinference/core/image_interface.py +9 -0
  4. xinference/core/model.py +4 -1
  5. xinference/core/worker.py +48 -41
  6. xinference/model/audio/chattts.py +24 -9
  7. xinference/model/audio/core.py +8 -2
  8. xinference/model/audio/fish_speech.py +228 -0
  9. xinference/model/audio/model_spec.json +8 -0
  10. xinference/model/embedding/core.py +23 -1
  11. xinference/model/image/model_spec.json +2 -1
  12. xinference/model/image/model_spec_modelscope.json +2 -1
  13. xinference/model/image/stable_diffusion/core.py +49 -1
  14. xinference/model/llm/__init__.py +6 -0
  15. xinference/model/llm/llm_family.json +54 -9
  16. xinference/model/llm/llm_family.py +2 -0
  17. xinference/model/llm/llm_family_modelscope.json +56 -10
  18. xinference/model/llm/lmdeploy/__init__.py +0 -0
  19. xinference/model/llm/lmdeploy/core.py +557 -0
  20. xinference/model/llm/transformers/cogvlm2.py +4 -45
  21. xinference/model/llm/transformers/cogvlm2_video.py +524 -0
  22. xinference/model/llm/transformers/core.py +1 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -23
  24. xinference/model/llm/transformers/intern_vl.py +94 -11
  25. xinference/model/llm/transformers/minicpmv25.py +2 -23
  26. xinference/model/llm/transformers/minicpmv26.py +2 -22
  27. xinference/model/llm/transformers/yi_vl.py +2 -24
  28. xinference/model/llm/utils.py +10 -1
  29. xinference/model/llm/vllm/core.py +1 -1
  30. xinference/thirdparty/fish_speech/__init__.py +0 -0
  31. xinference/thirdparty/fish_speech/fish_speech/__init__.py +0 -0
  32. xinference/thirdparty/fish_speech/fish_speech/callbacks/__init__.py +3 -0
  33. xinference/thirdparty/fish_speech/fish_speech/callbacks/grad_norm.py +113 -0
  34. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  35. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  36. xinference/thirdparty/fish_speech/fish_speech/conversation.py +2 -0
  37. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  38. xinference/thirdparty/fish_speech/fish_speech/datasets/concat_repeat.py +53 -0
  39. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  40. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_pb2.py +33 -0
  41. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text_data_stream.py +36 -0
  42. xinference/thirdparty/fish_speech/fish_speech/datasets/semantic.py +496 -0
  43. xinference/thirdparty/fish_speech/fish_speech/datasets/vqgan.py +147 -0
  44. xinference/thirdparty/fish_speech/fish_speech/i18n/__init__.py +3 -0
  45. xinference/thirdparty/fish_speech/fish_speech/i18n/core.py +40 -0
  46. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  47. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +122 -0
  48. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +122 -0
  49. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +123 -0
  50. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +133 -0
  51. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +122 -0
  52. xinference/thirdparty/fish_speech/fish_speech/i18n/scan.py +122 -0
  53. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  54. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/__init__.py +0 -0
  55. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lit_module.py +202 -0
  56. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +779 -0
  57. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/lora.py +92 -0
  58. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +3 -0
  59. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +442 -0
  60. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  61. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +44 -0
  62. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +625 -0
  63. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +139 -0
  64. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +115 -0
  65. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +225 -0
  66. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/utils.py +94 -0
  67. xinference/thirdparty/fish_speech/fish_speech/scheduler.py +40 -0
  68. xinference/thirdparty/fish_speech/fish_speech/text/__init__.py +4 -0
  69. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/__init__.py +0 -0
  70. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_class.py +172 -0
  71. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_constant.py +30 -0
  72. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/basic_util.py +342 -0
  73. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/cardinal.py +32 -0
  74. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/date.py +75 -0
  75. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/digit.py +32 -0
  76. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/fraction.py +35 -0
  77. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/money.py +43 -0
  78. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/percentage.py +33 -0
  79. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/telephone.py +51 -0
  80. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +177 -0
  81. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +69 -0
  82. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +130 -0
  83. xinference/thirdparty/fish_speech/fish_speech/train.py +139 -0
  84. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +23 -0
  85. xinference/thirdparty/fish_speech/fish_speech/utils/braceexpand.py +217 -0
  86. xinference/thirdparty/fish_speech/fish_speech/utils/context.py +13 -0
  87. xinference/thirdparty/fish_speech/fish_speech/utils/file.py +16 -0
  88. xinference/thirdparty/fish_speech/fish_speech/utils/instantiators.py +50 -0
  89. xinference/thirdparty/fish_speech/fish_speech/utils/logger.py +55 -0
  90. xinference/thirdparty/fish_speech/fish_speech/utils/logging_utils.py +48 -0
  91. xinference/thirdparty/fish_speech/fish_speech/utils/rich_utils.py +100 -0
  92. xinference/thirdparty/fish_speech/fish_speech/utils/spectrogram.py +122 -0
  93. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +114 -0
  94. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  95. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +120 -0
  96. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1237 -0
  97. xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
  98. xinference/thirdparty/fish_speech/tools/api.py +495 -0
  99. xinference/thirdparty/fish_speech/tools/auto_rerank.py +159 -0
  100. xinference/thirdparty/fish_speech/tools/download_models.py +55 -0
  101. xinference/thirdparty/fish_speech/tools/extract_model.py +21 -0
  102. xinference/thirdparty/fish_speech/tools/file.py +108 -0
  103. xinference/thirdparty/fish_speech/tools/gen_ref.py +36 -0
  104. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  105. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +169 -0
  106. xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +171 -0
  107. xinference/thirdparty/fish_speech/tools/llama/generate.py +698 -0
  108. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +95 -0
  109. xinference/thirdparty/fish_speech/tools/llama/quantize.py +497 -0
  110. xinference/thirdparty/fish_speech/tools/llama/rebuild_tokenizer.py +57 -0
  111. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +55 -0
  112. xinference/thirdparty/fish_speech/tools/post_api.py +164 -0
  113. xinference/thirdparty/fish_speech/tools/sensevoice/__init__.py +0 -0
  114. xinference/thirdparty/fish_speech/tools/sensevoice/auto_model.py +573 -0
  115. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +332 -0
  116. xinference/thirdparty/fish_speech/tools/sensevoice/vad_utils.py +61 -0
  117. xinference/thirdparty/fish_speech/tools/smart_pad.py +47 -0
  118. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  119. xinference/thirdparty/fish_speech/tools/vqgan/create_train_split.py +83 -0
  120. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +227 -0
  121. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +120 -0
  122. xinference/thirdparty/fish_speech/tools/webui.py +619 -0
  123. xinference/thirdparty/fish_speech/tools/whisper_asr.py +176 -0
  124. xinference/web/ui/build/asset-manifest.json +3 -3
  125. xinference/web/ui/build/index.html +1 -1
  126. xinference/web/ui/build/static/js/{main.ffc26121.js → main.661c7b0a.js} +3 -3
  127. xinference/web/ui/build/static/js/main.661c7b0a.js.map +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +1 -0
  129. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/METADATA +18 -6
  130. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/RECORD +135 -37
  131. xinference/web/ui/build/static/js/main.ffc26121.js.map +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/2f40209b32e7e46a2eab6b8c8a355eb42c3caa8bc3228dd929f32fd2b3940294.json +0 -1
  133. /xinference/web/ui/build/static/js/{main.ffc26121.js.LICENSE.txt → main.661c7b0a.js.LICENSE.txt} +0 -0
  134. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/LICENSE +0 -0
  135. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/WHEEL +0 -0
  136. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/entry_points.txt +0 -0
  137. {xinference-0.14.2.dist-info → xinference-0.14.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,164 @@
1
+ import argparse
2
+ import base64
3
+ import json
4
+ import wave
5
+ from pathlib import Path
6
+
7
+ import pyaudio
8
+ import requests
9
+
10
+
11
+ def wav_to_base64(file_path):
12
+ if not file_path or not Path(file_path).exists():
13
+ return None
14
+ with open(file_path, "rb") as wav_file:
15
+ wav_content = wav_file.read()
16
+ base64_encoded = base64.b64encode(wav_content)
17
+ return base64_encoded.decode("utf-8")
18
+
19
+
20
+ def read_ref_text(ref_text):
21
+ path = Path(ref_text)
22
+ if path.exists() and path.is_file():
23
+ with path.open("r", encoding="utf-8") as file:
24
+ return file.read()
25
+ return ref_text
26
+
27
+
28
+ def play_audio(audio_content, format, channels, rate):
29
+ p = pyaudio.PyAudio()
30
+ stream = p.open(format=format, channels=channels, rate=rate, output=True)
31
+ stream.write(audio_content)
32
+ stream.stop_stream()
33
+ stream.close()
34
+ p.terminate()
35
+
36
+
37
+ if __name__ == "__main__":
38
+ parser = argparse.ArgumentParser(
39
+ description="Send a WAV file and text to a server and receive synthesized audio."
40
+ )
41
+
42
+ parser.add_argument(
43
+ "--url",
44
+ "-u",
45
+ type=str,
46
+ default="http://127.0.0.1:8080/v1/invoke",
47
+ help="URL of the server",
48
+ )
49
+ parser.add_argument(
50
+ "--text", "-t", type=str, required=True, help="Text to be synthesized"
51
+ )
52
+ parser.add_argument(
53
+ "--reference_audio",
54
+ "-ra",
55
+ type=str,
56
+ default=None,
57
+ help="Path to the WAV file",
58
+ )
59
+ parser.add_argument(
60
+ "--reference_text",
61
+ "-rt",
62
+ type=str,
63
+ default=None,
64
+ help="Reference text for voice synthesis",
65
+ )
66
+ parser.add_argument(
67
+ "--max_new_tokens",
68
+ type=int,
69
+ default=1024,
70
+ help="Maximum new tokens to generate",
71
+ )
72
+ parser.add_argument(
73
+ "--chunk_length", type=int, default=100, help="Chunk length for synthesis"
74
+ )
75
+ parser.add_argument(
76
+ "--top_p", type=float, default=0.7, help="Top-p sampling for synthesis"
77
+ )
78
+ parser.add_argument(
79
+ "--repetition_penalty",
80
+ type=float,
81
+ default=1.2,
82
+ help="Repetition penalty for synthesis",
83
+ )
84
+ parser.add_argument(
85
+ "--temperature", type=float, default=0.7, help="Temperature for sampling"
86
+ )
87
+ parser.add_argument(
88
+ "--speaker", type=str, default=None, help="Speaker ID for voice synthesis"
89
+ )
90
+ parser.add_argument("--emotion", type=str, default=None, help="Speaker's Emotion")
91
+ parser.add_argument("--format", type=str, default="wav", help="Audio format")
92
+ parser.add_argument(
93
+ "--streaming", type=bool, default=False, help="Enable streaming response"
94
+ )
95
+ parser.add_argument(
96
+ "--channels", type=int, default=1, help="Number of audio channels"
97
+ )
98
+ parser.add_argument("--rate", type=int, default=44100, help="Sample rate for audio")
99
+
100
+ args = parser.parse_args()
101
+
102
+ base64_audio = wav_to_base64(args.reference_audio)
103
+
104
+ ref_text = args.reference_text
105
+ if ref_text:
106
+ ref_text = read_ref_text(ref_text)
107
+
108
+ data = {
109
+ "text": args.text,
110
+ "reference_text": ref_text,
111
+ "reference_audio": base64_audio,
112
+ "max_new_tokens": args.max_new_tokens,
113
+ "chunk_length": args.chunk_length,
114
+ "top_p": args.top_p,
115
+ "repetition_penalty": args.repetition_penalty,
116
+ "temperature": args.temperature,
117
+ "speaker": args.speaker,
118
+ "emotion": args.emotion,
119
+ "format": args.format,
120
+ "streaming": args.streaming,
121
+ }
122
+
123
+ response = requests.post(args.url, json=data, stream=args.streaming)
124
+
125
+ audio_format = pyaudio.paInt16 # Assuming 16-bit PCM format
126
+
127
+ if response.status_code == 200:
128
+ if args.streaming:
129
+ p = pyaudio.PyAudio()
130
+ stream = p.open(
131
+ format=audio_format, channels=args.channels, rate=args.rate, output=True
132
+ )
133
+
134
+ wf = wave.open("generated_audio.wav", "wb")
135
+ wf.setnchannels(args.channels)
136
+ wf.setsampwidth(p.get_sample_size(audio_format))
137
+ wf.setframerate(args.rate)
138
+
139
+ stream_stopped_flag = False
140
+
141
+ try:
142
+ for chunk in response.iter_content(chunk_size=1024):
143
+ if chunk:
144
+ stream.write(chunk)
145
+ wf.writeframesraw(chunk)
146
+ else:
147
+ if not stream_stopped_flag:
148
+ stream.stop_stream()
149
+ stream_stopped_flag = True
150
+ finally:
151
+ stream.close()
152
+ p.terminate()
153
+ wf.close()
154
+ else:
155
+ audio_content = response.content
156
+
157
+ with open("generated_audio.wav", "wb") as audio_file:
158
+ audio_file.write(audio_content)
159
+
160
+ play_audio(audio_content, audio_format, args.channels, args.rate)
161
+ print("Audio has been saved to 'generated_audio.wav'.")
162
+ else:
163
+ print(f"Request failed with status code {response.status_code}")
164
+ print(response.json())