xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +51 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +209 -40
- xinference/client/restful/restful_client.py +7 -26
- xinference/conftest.py +1 -1
- xinference/constants.py +5 -0
- xinference/core/cache_tracker.py +1 -1
- xinference/core/chat_interface.py +8 -14
- xinference/core/event.py +1 -1
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +110 -31
- xinference/core/scheduler.py +37 -37
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +17 -10
- xinference/core/utils.py +80 -22
- xinference/core/worker.py +17 -16
- xinference/deploy/cmdline.py +8 -16
- xinference/deploy/local.py +1 -1
- xinference/deploy/supervisor.py +1 -1
- xinference/deploy/utils.py +1 -1
- xinference/deploy/worker.py +1 -1
- xinference/model/audio/cosyvoice.py +86 -41
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/embedding/core.py +52 -31
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +164 -19
- xinference/model/llm/__init__.py +29 -11
- xinference/model/llm/llama_cpp/core.py +16 -33
- xinference/model/llm/llm_family.json +1011 -1296
- xinference/model/llm/llm_family.py +34 -53
- xinference/model/llm/llm_family_csghub.json +18 -35
- xinference/model/llm/llm_family_modelscope.json +981 -1122
- xinference/model/llm/lmdeploy/core.py +56 -88
- xinference/model/llm/mlx/core.py +46 -69
- xinference/model/llm/sglang/core.py +36 -18
- xinference/model/llm/transformers/chatglm.py +168 -306
- xinference/model/llm/transformers/cogvlm2.py +36 -63
- xinference/model/llm/transformers/cogvlm2_video.py +33 -223
- xinference/model/llm/transformers/core.py +55 -50
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/deepseek_vl.py +53 -96
- xinference/model/llm/transformers/glm4v.py +55 -111
- xinference/model/llm/transformers/intern_vl.py +39 -70
- xinference/model/llm/transformers/internlm2.py +32 -54
- xinference/model/llm/transformers/minicpmv25.py +22 -55
- xinference/model/llm/transformers/minicpmv26.py +158 -68
- xinference/model/llm/transformers/omnilmm.py +5 -28
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +234 -0
- xinference/model/llm/transformers/qwen_vl.py +34 -86
- xinference/model/llm/transformers/utils.py +32 -38
- xinference/model/llm/transformers/yi_vl.py +32 -72
- xinference/model/llm/utils.py +280 -554
- xinference/model/llm/vllm/core.py +161 -100
- xinference/model/rerank/core.py +41 -8
- xinference/model/rerank/model_spec.json +7 -0
- xinference/model/rerank/model_spec_modelscope.json +7 -1
- xinference/model/utils.py +1 -31
- xinference/thirdparty/cosyvoice/bin/export_jit.py +64 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.py +8 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +38 -22
- xinference/thirdparty/cosyvoice/cli/model.py +139 -26
- xinference/thirdparty/cosyvoice/flow/flow.py +15 -9
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +20 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +8 -4
- xinference/thirdparty/cosyvoice/llm/llm.py +14 -13
- xinference/thirdparty/cosyvoice/transformer/attention.py +7 -3
- xinference/thirdparty/cosyvoice/transformer/decoder.py +1 -1
- xinference/thirdparty/cosyvoice/transformer/embedding.py +4 -3
- xinference/thirdparty/cosyvoice/transformer/encoder.py +4 -2
- xinference/thirdparty/cosyvoice/utils/common.py +36 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +16 -0
- xinference/thirdparty/deepseek_vl/serve/assets/Kelpy-Codos.js +100 -0
- xinference/thirdparty/deepseek_vl/serve/assets/avatar.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.css +355 -0
- xinference/thirdparty/deepseek_vl/serve/assets/custom.js +22 -0
- xinference/thirdparty/deepseek_vl/serve/assets/favicon.ico +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/app.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/chart.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/mirror.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/pipeline.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/puzzle.png +0 -0
- xinference/thirdparty/deepseek_vl/serve/examples/rap.jpeg +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/base.yaml +87 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +33 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/r_8_alpha_16.yaml +4 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +83 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/text-data.proto +24 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/README.md +27 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/.gitignore +114 -0
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/README.md +36 -0
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/css/style.css +161 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/html/footer.html +11 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/js/animate.js +69 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/README.md +59 -0
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/thirdparty/matcha/VERSION +1 -0
- xinference/thirdparty/matcha/hifigan/LICENSE +21 -0
- xinference/thirdparty/matcha/hifigan/README.md +101 -0
- xinference/thirdparty/omnilmm/LICENSE +201 -0
- xinference/thirdparty/whisper/__init__.py +156 -0
- xinference/thirdparty/whisper/__main__.py +3 -0
- xinference/thirdparty/whisper/assets/gpt2.tiktoken +50256 -0
- xinference/thirdparty/whisper/assets/mel_filters.npz +0 -0
- xinference/thirdparty/whisper/assets/multilingual.tiktoken +50257 -0
- xinference/thirdparty/whisper/audio.py +157 -0
- xinference/thirdparty/whisper/decoding.py +826 -0
- xinference/thirdparty/whisper/model.py +314 -0
- xinference/thirdparty/whisper/normalizers/__init__.py +2 -0
- xinference/thirdparty/whisper/normalizers/basic.py +76 -0
- xinference/thirdparty/whisper/normalizers/english.json +1741 -0
- xinference/thirdparty/whisper/normalizers/english.py +550 -0
- xinference/thirdparty/whisper/timing.py +386 -0
- xinference/thirdparty/whisper/tokenizer.py +395 -0
- xinference/thirdparty/whisper/transcribe.py +605 -0
- xinference/thirdparty/whisper/triton_ops.py +109 -0
- xinference/thirdparty/whisper/utils.py +316 -0
- xinference/thirdparty/whisper/version.py +1 -0
- xinference/types.py +14 -53
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.4bafd904.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/main.754740c0.js +3 -0
- xinference/web/ui/build/static/js/{main.eb13fe95.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +2 -0
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/10c69dc7a296779fcffedeff9393d832dfcb0013c36824adf623d3c518b801ff.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/77d50223f3e734d4485cca538cb098a8c3a7a0a1a9f01f58cdda3af42fe1adf5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a56d5a642409a84988891089c98ca28ad0546432dfbae8aaa51bc5a280e1cdd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9ff696a3e3471f01b46c63d18af32e491eb5dc0e43cb30202c96871466df57f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +37 -0
- xinference/web/ui/node_modules/a-sync-waterfall/package.json +21 -0
- xinference/web/ui/node_modules/nunjucks/node_modules/commander/package.json +48 -0
- xinference/web/ui/node_modules/nunjucks/package.json +112 -0
- xinference/web/ui/package-lock.json +38 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/METADATA +16 -10
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/RECORD +179 -127
- xinference/model/llm/transformers/llama_2.py +0 -108
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.4bafd904.css.map +0 -1
- xinference/web/ui/build/static/js/main.eb13fe95.js +0 -3
- xinference/web/ui/build/static/js/main.eb13fe95.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +0 -1
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.4.post1.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
|
-
import time
|
|
16
15
|
import uuid
|
|
17
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
18
17
|
from threading import Thread
|
|
@@ -21,17 +20,14 @@ from typing import Dict, Iterator, List, Optional, Union
|
|
|
21
20
|
import torch
|
|
22
21
|
|
|
23
22
|
from ....model.utils import select_device
|
|
24
|
-
from ....types import
|
|
25
|
-
ChatCompletion,
|
|
26
|
-
ChatCompletionChunk,
|
|
27
|
-
ChatCompletionMessage,
|
|
28
|
-
Completion,
|
|
29
|
-
CompletionChoice,
|
|
30
|
-
CompletionChunk,
|
|
31
|
-
CompletionUsage,
|
|
32
|
-
)
|
|
23
|
+
from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
|
|
33
24
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
34
|
-
from ..utils import
|
|
25
|
+
from ..utils import (
|
|
26
|
+
_decode_image,
|
|
27
|
+
generate_chat_completion,
|
|
28
|
+
generate_completion_chunk,
|
|
29
|
+
parse_messages,
|
|
30
|
+
)
|
|
35
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
36
32
|
|
|
37
33
|
logger = logging.getLogger(__name__)
|
|
@@ -105,15 +101,11 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
105
101
|
|
|
106
102
|
def chat(
|
|
107
103
|
self,
|
|
108
|
-
|
|
109
|
-
system_prompt: Optional[str] = None,
|
|
110
|
-
chat_history: Optional[List[ChatCompletionMessage]] = None,
|
|
104
|
+
messages: List[Dict],
|
|
111
105
|
generate_config: Optional[PytorchGenerateConfig] = None,
|
|
112
106
|
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
113
107
|
from transformers import TextIteratorStreamer
|
|
114
108
|
|
|
115
|
-
# TODO(codingl2k1): implement stream mode.
|
|
116
|
-
|
|
117
109
|
if not generate_config:
|
|
118
110
|
generate_config = {}
|
|
119
111
|
|
|
@@ -134,7 +126,8 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
134
126
|
|
|
135
127
|
# Convert chat history to llava state
|
|
136
128
|
state = conv_templates["mm_default"].copy()
|
|
137
|
-
|
|
129
|
+
prompt, _, chat_history = parse_messages(messages)
|
|
130
|
+
for message in chat_history:
|
|
138
131
|
content = self._message_content_to_yi(message["content"])
|
|
139
132
|
state.append_message(message["role"], content)
|
|
140
133
|
state.append_message(state.roles[0], self._message_content_to_yi(prompt))
|
|
@@ -190,31 +183,15 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
190
183
|
it = self._generate_stream(streamer, stop_str, input_ids, include_usage)
|
|
191
184
|
return self._to_chat_completion_chunks(it)
|
|
192
185
|
else:
|
|
193
|
-
|
|
194
|
-
return self._to_chat_completion(c)
|
|
186
|
+
return self._generate(streamer, stop_str)
|
|
195
187
|
|
|
196
|
-
def _generate(self, streamer, stop_str) ->
|
|
188
|
+
def _generate(self, streamer, stop_str) -> ChatCompletion:
|
|
197
189
|
generated_text = ""
|
|
198
190
|
for new_text in streamer:
|
|
199
191
|
generated_text += new_text
|
|
200
192
|
if generated_text.endswith(stop_str):
|
|
201
193
|
generated_text = generated_text[: -len(stop_str)]
|
|
202
|
-
|
|
203
|
-
c = Completion(
|
|
204
|
-
id=str(uuid.uuid1()),
|
|
205
|
-
object="text_completion",
|
|
206
|
-
created=int(time.time()),
|
|
207
|
-
model=self.model_uid,
|
|
208
|
-
choices=[
|
|
209
|
-
CompletionChoice(
|
|
210
|
-
index=0, text=generated_text, finish_reason="stop", logprobs=None
|
|
211
|
-
)
|
|
212
|
-
],
|
|
213
|
-
usage=CompletionUsage(
|
|
214
|
-
prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
|
|
215
|
-
),
|
|
216
|
-
)
|
|
217
|
-
return c
|
|
194
|
+
return generate_chat_completion(self.model_uid, generated_text)
|
|
218
195
|
|
|
219
196
|
def _generate_stream(
|
|
220
197
|
self, streamer, stop_str, input_ids, include_usage
|
|
@@ -224,54 +201,37 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
224
201
|
prompt_tokens = len(input_ids[0])
|
|
225
202
|
for i, new_text in enumerate(streamer):
|
|
226
203
|
if not new_text.endswith(stop_str):
|
|
227
|
-
completion_choice = CompletionChoice(
|
|
228
|
-
text=new_text, index=0, logprobs=None, finish_reason=None
|
|
229
|
-
)
|
|
230
|
-
chunk = CompletionChunk(
|
|
231
|
-
id=completion_id,
|
|
232
|
-
object="text_completion",
|
|
233
|
-
created=int(time.time()),
|
|
234
|
-
model=self.model_uid,
|
|
235
|
-
choices=[completion_choice],
|
|
236
|
-
)
|
|
237
204
|
completion_tokens = i
|
|
238
205
|
total_tokens = prompt_tokens + completion_tokens
|
|
239
|
-
|
|
206
|
+
yield generate_completion_chunk(
|
|
207
|
+
chunk_text=new_text,
|
|
208
|
+
finish_reason=None,
|
|
209
|
+
chunk_id=completion_id,
|
|
210
|
+
model_uid=self.model_uid,
|
|
240
211
|
prompt_tokens=prompt_tokens,
|
|
241
212
|
completion_tokens=completion_tokens,
|
|
242
213
|
total_tokens=total_tokens,
|
|
243
214
|
)
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
)
|
|
250
|
-
chunk = CompletionChunk(
|
|
251
|
-
id=completion_id,
|
|
252
|
-
object="text_completion",
|
|
253
|
-
created=int(time.time()),
|
|
254
|
-
model=self.model_uid,
|
|
255
|
-
choices=[completion_choice],
|
|
256
|
-
)
|
|
257
|
-
completion_usage = CompletionUsage(
|
|
215
|
+
yield generate_completion_chunk(
|
|
216
|
+
chunk_text=None,
|
|
217
|
+
finish_reason="stop",
|
|
218
|
+
chunk_id=completion_id,
|
|
219
|
+
model_uid=self.model_uid,
|
|
258
220
|
prompt_tokens=prompt_tokens,
|
|
259
221
|
completion_tokens=completion_tokens,
|
|
260
222
|
total_tokens=total_tokens,
|
|
223
|
+
has_choice=True,
|
|
224
|
+
has_content=False,
|
|
261
225
|
)
|
|
262
|
-
chunk["usage"] = completion_usage
|
|
263
|
-
yield chunk
|
|
264
226
|
if include_usage:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
choices=[],
|
|
271
|
-
)
|
|
272
|
-
chunk["usage"] = CompletionUsage(
|
|
227
|
+
yield generate_completion_chunk(
|
|
228
|
+
chunk_text=None,
|
|
229
|
+
finish_reason=None,
|
|
230
|
+
chunk_id=completion_id,
|
|
231
|
+
model_uid=self.model_uid,
|
|
273
232
|
prompt_tokens=prompt_tokens,
|
|
274
233
|
completion_tokens=completion_tokens,
|
|
275
234
|
total_tokens=total_tokens,
|
|
235
|
+
has_choice=False,
|
|
236
|
+
has_content=False,
|
|
276
237
|
)
|
|
277
|
-
yield chunk
|