xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +65 -3
  4. xinference/conftest.py +0 -7
  5. xinference/core/media_interface.py +132 -8
  6. xinference/core/model.py +44 -6
  7. xinference/core/scheduler.py +1 -10
  8. xinference/core/supervisor.py +8 -17
  9. xinference/core/worker.py +5 -27
  10. xinference/deploy/cmdline.py +6 -2
  11. xinference/model/audio/chattts.py +24 -39
  12. xinference/model/audio/cosyvoice.py +18 -30
  13. xinference/model/audio/funasr.py +42 -0
  14. xinference/model/audio/model_spec.json +71 -1
  15. xinference/model/audio/model_spec_modelscope.json +76 -2
  16. xinference/model/audio/utils.py +75 -0
  17. xinference/model/core.py +1 -0
  18. xinference/model/embedding/__init__.py +74 -18
  19. xinference/model/embedding/core.py +98 -589
  20. xinference/model/embedding/embed_family.py +133 -0
  21. xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
  22. xinference/model/embedding/flag/core.py +282 -0
  23. xinference/model/embedding/model_spec.json +24 -0
  24. xinference/model/embedding/model_spec_modelscope.json +24 -0
  25. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  26. xinference/model/embedding/sentence_transformers/core.py +399 -0
  27. xinference/model/embedding/vllm/core.py +95 -0
  28. xinference/model/image/model_spec.json +30 -3
  29. xinference/model/image/model_spec_modelscope.json +41 -2
  30. xinference/model/image/stable_diffusion/core.py +144 -53
  31. xinference/model/llm/__init__.py +6 -54
  32. xinference/model/llm/core.py +19 -5
  33. xinference/model/llm/llama_cpp/core.py +59 -3
  34. xinference/model/llm/llama_cpp/memory.py +457 -0
  35. xinference/model/llm/llm_family.json +247 -402
  36. xinference/model/llm/llm_family.py +88 -16
  37. xinference/model/llm/llm_family_modelscope.json +260 -421
  38. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  39. xinference/model/llm/sglang/core.py +8 -0
  40. xinference/model/llm/transformers/__init__.py +27 -6
  41. xinference/model/llm/transformers/chatglm.py +4 -2
  42. xinference/model/llm/transformers/core.py +49 -28
  43. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  44. xinference/model/llm/transformers/gemma3.py +119 -164
  45. xinference/model/llm/transformers/multimodal/__init__.py +13 -0
  46. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  47. xinference/model/llm/transformers/multimodal/core.py +205 -0
  48. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  49. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  50. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  51. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  52. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  53. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  54. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  55. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  56. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  57. xinference/model/llm/transformers/opt.py +4 -2
  58. xinference/model/llm/transformers/utils.py +6 -37
  59. xinference/model/llm/utils.py +11 -0
  60. xinference/model/llm/vllm/core.py +7 -0
  61. xinference/model/rerank/core.py +91 -3
  62. xinference/model/rerank/model_spec.json +24 -0
  63. xinference/model/rerank/model_spec_modelscope.json +24 -0
  64. xinference/model/rerank/utils.py +20 -2
  65. xinference/model/utils.py +38 -1
  66. xinference/model/video/diffusers.py +65 -3
  67. xinference/model/video/model_spec.json +31 -4
  68. xinference/model/video/model_spec_modelscope.json +32 -4
  69. xinference/web/ui/build/asset-manifest.json +6 -6
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  72. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  82. xinference/web/ui/src/locales/en.json +21 -8
  83. xinference/web/ui/src/locales/ja.json +224 -0
  84. xinference/web/ui/src/locales/ko.json +224 -0
  85. xinference/web/ui/src/locales/zh.json +21 -8
  86. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
  87. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
  88. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
  89. xinference/model/llm/transformers/cogvlm2.py +0 -442
  90. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  91. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  92. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  93. xinference/model/llm/transformers/intern_vl.py +0 -526
  94. xinference/model/llm/transformers/internlm2.py +0 -94
  95. xinference/model/llm/transformers/minicpmv25.py +0 -193
  96. xinference/model/llm/transformers/omnilmm.py +0 -132
  97. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  98. xinference/model/llm/transformers/qwen_vl.py +0 -360
  99. xinference/thirdparty/omnilmm/LICENSE +0 -201
  100. xinference/thirdparty/omnilmm/chat.py +0 -218
  101. xinference/thirdparty/omnilmm/constants.py +0 -4
  102. xinference/thirdparty/omnilmm/conversation.py +0 -332
  103. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  104. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  105. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  106. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  107. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  108. xinference/thirdparty/omnilmm/utils.py +0 -134
  109. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  110. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  111. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  112. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  117. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  118. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  120. /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
  121. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  122. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  123. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  124. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -1,213 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import logging
15
- import uuid
16
- from concurrent.futures import ThreadPoolExecutor
17
- from threading import Thread
18
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
19
-
20
- import torch
21
-
22
- from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
- from ...utils import select_device
24
- from ..llm_family import LLMFamilyV1, LLMSpecV1
25
- from ..utils import (
26
- _decode_image_without_rgb,
27
- generate_chat_completion,
28
- generate_completion_chunk,
29
- )
30
- from .core import PytorchChatModel, PytorchGenerateConfig
31
- from .utils import cache_clean
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
-
36
- class GlmEdgeVModel(PytorchChatModel):
37
- def __init__(self, *args, **kwargs):
38
- super().__init__(*args, **kwargs)
39
- self._device = None
40
- self._tokenizer = None
41
- self._model = None
42
- self._processor = None
43
-
44
- @classmethod
45
- def match_json(
46
- cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
47
- ) -> bool:
48
- family = model_family.model_family or model_family.model_name
49
- if "glm-edge-v" in family.lower():
50
- return True
51
- return False
52
-
53
- def load(self):
54
- from transformers import AutoImageProcessor, AutoModelForCausalLM, AutoTokenizer
55
-
56
- device = self._pytorch_model_config.get("device", "auto")
57
- self._device = select_device(device)
58
-
59
- kwargs = {"device_map": self._device}
60
- kwargs = self.apply_bnb_quantization(kwargs)
61
-
62
- processor = AutoImageProcessor.from_pretrained(
63
- self.model_path, trust_remote_code=True
64
- )
65
- self._processor = processor
66
-
67
- model = AutoModelForCausalLM.from_pretrained(
68
- self.model_path,
69
- trust_remote_code=True,
70
- torch_dtype=torch.bfloat16,
71
- device_map="auto",
72
- **kwargs
73
- )
74
-
75
- self._model = model
76
-
77
- tokenizer = AutoTokenizer.from_pretrained(
78
- self.model_path, trust_remote_code=True
79
- )
80
- self._tokenizer = tokenizer
81
-
82
- @staticmethod
83
- def _get_processed_msgs(
84
- messages: List[Dict],
85
- ) -> Tuple[List[Dict[str, Any]], List[Any]]:
86
- res = []
87
- img = []
88
- for message in messages:
89
- role = message["role"]
90
- content = message["content"]
91
- if isinstance(content, str):
92
- res.append({"role": role, "content": content})
93
- else:
94
- texts = []
95
- image_urls = []
96
- for c in content:
97
- c_type = c.get("type")
98
- if c_type == "text":
99
- texts.append(c["text"])
100
- else:
101
- assert (
102
- c_type == "image_url"
103
- ), "Please follow the image input of the OpenAI API."
104
- image_urls.append(c["image_url"]["url"])
105
- if len(image_urls) > 1:
106
- raise RuntimeError("Only one image per message is supported")
107
- image_futures = []
108
- with ThreadPoolExecutor() as executor:
109
- for image_url in image_urls:
110
- fut = executor.submit(_decode_image_without_rgb, image_url)
111
- image_futures.append(fut)
112
- images = [fut.result() for fut in image_futures]
113
- assert len(images) <= 1
114
- text = " ".join(texts)
115
- img.extend(images)
116
- if images:
117
- res.append(
118
- {
119
- "role": role,
120
- "content": [
121
- {"type": "image"},
122
- {"type": "text", "text": text},
123
- ],
124
- }
125
- )
126
- else:
127
- res.append({"role": role, "content": text})
128
- return res, img
129
-
130
- @cache_clean
131
- def chat(
132
- self,
133
- messages: List[Dict],
134
- generate_config: Optional[PytorchGenerateConfig] = None,
135
- ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
136
- from transformers import TextIteratorStreamer
137
-
138
- if not generate_config:
139
- generate_config = {}
140
-
141
- stream = generate_config.get("stream", False)
142
- msgs, imgs = self._get_processed_msgs(messages)
143
-
144
- inputs = self._tokenizer.apply_chat_template(
145
- msgs,
146
- add_generation_prompt=True,
147
- tokenize=True,
148
- return_tensors="pt",
149
- return_dict=True,
150
- ) # chat mode
151
- inputs = inputs.to(self._model.device)
152
-
153
- generate_kwargs = {
154
- **inputs,
155
- }
156
- if len(imgs) > 0:
157
- generate_kwargs["pixel_values"] = torch.tensor(
158
- self._processor(imgs[-1]).pixel_values
159
- ).to(self._model.device)
160
- stop_str = "<|endoftext|>"
161
-
162
- if stream:
163
- streamer = TextIteratorStreamer(
164
- tokenizer=self._tokenizer,
165
- timeout=60,
166
- skip_prompt=True,
167
- skip_special_tokens=True,
168
- )
169
- generate_kwargs = {
170
- **generate_kwargs,
171
- "streamer": streamer,
172
- }
173
- t = Thread(target=self._model.generate, kwargs=generate_kwargs)
174
- t.start()
175
-
176
- it = self.chat_stream(streamer, stop_str)
177
- return self._to_chat_completion_chunks(it)
178
- else:
179
- with torch.no_grad():
180
- outputs = self._model.generate(**generate_kwargs)
181
- outputs = outputs[0][len(inputs["input_ids"][0]) :]
182
- response = self._tokenizer.decode(outputs)
183
- if response.endswith(stop_str):
184
- response = response[: -len(stop_str)]
185
- return generate_chat_completion(self.model_uid, response)
186
-
187
- def chat_stream(self, streamer, stop_str) -> Iterator[CompletionChunk]:
188
- completion_id = str(uuid.uuid1())
189
- for new_text in streamer:
190
- if not new_text.endswith(stop_str):
191
- yield generate_completion_chunk(
192
- chunk_text=new_text,
193
- finish_reason=None,
194
- chunk_id=completion_id,
195
- model_uid=self.model_uid,
196
- prompt_tokens=-1,
197
- completion_tokens=-1,
198
- total_tokens=-1,
199
- has_choice=True,
200
- has_content=True,
201
- )
202
-
203
- yield generate_completion_chunk(
204
- chunk_text=None,
205
- finish_reason="stop",
206
- chunk_id=completion_id,
207
- model_uid=self.model_uid,
208
- prompt_tokens=-1,
209
- completion_tokens=-1,
210
- total_tokens=-1,
211
- has_choice=True,
212
- has_content=False,
213
- )