xinference 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +10 -3
- xinference/constants.py +5 -1
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +2 -2
- xinference/deploy/cmdline.py +17 -0
- xinference/model/audio/core.py +1 -1
- xinference/model/audio/model_spec.json +43 -43
- xinference/model/audio/model_spec_modelscope.json +13 -13
- xinference/model/llm/__init__.py +3 -5
- xinference/model/llm/core.py +14 -0
- xinference/model/llm/llama_cpp/core.py +15 -4
- xinference/model/llm/llm_family.json +3251 -4304
- xinference/model/llm/llm_family.py +62 -6
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1161 -1789
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +19 -6
- xinference/model/llm/sglang/core.py +25 -10
- xinference/model/llm/transformers/chatglm.py +8 -1
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +50 -58
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +2 -20
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +2 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +28 -0
- xinference/model/llm/vllm/core.py +73 -9
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
- xinference/model/video/diffusers.py +30 -3
- xinference/model/video/model_spec.json +46 -0
- xinference/model/video/model_spec_modelscope.json +48 -0
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +1 -0
- xinference/web/ui/src/locales/zh.json +1 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
xinference/model/llm/__init__.py
CHANGED
|
@@ -57,7 +57,7 @@ from .llm_family import (
|
|
|
57
57
|
|
|
58
58
|
def check_format_with_engine(model_format, engine):
|
|
59
59
|
# only llama-cpp-python support and only support ggufv2
|
|
60
|
-
if model_format in ["ggufv2"] and engine
|
|
60
|
+
if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
|
|
61
61
|
return False
|
|
62
62
|
if model_format not in ["ggufv2"] and engine == "llama.cpp":
|
|
63
63
|
return False
|
|
@@ -147,13 +147,12 @@ def _install():
|
|
|
147
147
|
from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
|
|
148
148
|
from .transformers.glm4v import Glm4VModel
|
|
149
149
|
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
150
|
-
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
151
150
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
152
151
|
from .transformers.minicpmv26 import MiniCPMV26Model
|
|
153
152
|
from .transformers.opt import OptPytorchModel
|
|
153
|
+
from .transformers.ovis2 import Ovis2ChatModel
|
|
154
154
|
from .transformers.qwen2_audio import Qwen2AudioChatModel
|
|
155
155
|
from .transformers.qwen_vl import QwenVLChatModel
|
|
156
|
-
from .transformers.yi_vl import YiVLChatModel
|
|
157
156
|
from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
|
|
158
157
|
|
|
159
158
|
try:
|
|
@@ -180,10 +179,8 @@ def _install():
|
|
|
180
179
|
[
|
|
181
180
|
ChatglmPytorchChatModel,
|
|
182
181
|
PytorchChatModel,
|
|
183
|
-
Internlm2PytorchChatModel,
|
|
184
182
|
QwenVLChatModel,
|
|
185
183
|
Qwen2AudioChatModel,
|
|
186
|
-
YiVLChatModel,
|
|
187
184
|
DeepSeekVLChatModel,
|
|
188
185
|
DeepSeekVL2ChatModel,
|
|
189
186
|
PytorchModel,
|
|
@@ -199,6 +196,7 @@ def _install():
|
|
|
199
196
|
CogAgentChatModel,
|
|
200
197
|
Gemma3TextChatModel,
|
|
201
198
|
Gemma3ChatModel,
|
|
199
|
+
Ovis2ChatModel,
|
|
202
200
|
]
|
|
203
201
|
)
|
|
204
202
|
if OmniLMMModel: # type: ignore
|
xinference/model/llm/core.py
CHANGED
|
@@ -65,6 +65,11 @@ class LLM(abc.ABC):
|
|
|
65
65
|
if kwargs:
|
|
66
66
|
raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
|
|
67
67
|
|
|
68
|
+
@classmethod
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def check_lib(cls) -> bool:
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
68
73
|
@staticmethod
|
|
69
74
|
def _is_darwin_and_apple_silicon():
|
|
70
75
|
return platform.system() == "Darwin" and platform.processor() == "arm"
|
|
@@ -117,6 +122,15 @@ class LLM(abc.ABC):
|
|
|
117
122
|
@classmethod
|
|
118
123
|
def match(
|
|
119
124
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
125
|
+
) -> bool:
|
|
126
|
+
if not cls.check_lib():
|
|
127
|
+
return False
|
|
128
|
+
return cls.match_json(llm_family, llm_spec, quantization)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
@abstractmethod
|
|
132
|
+
def match_json(
|
|
133
|
+
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
120
134
|
) -> bool:
|
|
121
135
|
raise NotImplementedError
|
|
122
136
|
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import concurrent.futures
|
|
15
|
+
import importlib.util
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
18
|
import queue
|
|
@@ -116,7 +117,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
116
117
|
return generate_config
|
|
117
118
|
|
|
118
119
|
@classmethod
|
|
119
|
-
def
|
|
120
|
+
def check_lib(cls) -> bool:
|
|
121
|
+
return importlib.util.find_spec("xllamacpp") is not None
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def match_json(
|
|
120
125
|
cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
|
|
121
126
|
) -> bool:
|
|
122
127
|
if llm_spec.model_format not in ["ggufv2"]:
|
|
@@ -464,7 +469,11 @@ class LlamaCppModel(LLM):
|
|
|
464
469
|
raise RuntimeError(f"Load model {self.model_family.model_name} failed")
|
|
465
470
|
|
|
466
471
|
@classmethod
|
|
467
|
-
def
|
|
472
|
+
def check_lib(cls) -> bool:
|
|
473
|
+
return importlib.util.find_spec("llama_cpp") is not None
|
|
474
|
+
|
|
475
|
+
@classmethod
|
|
476
|
+
def match_json(
|
|
468
477
|
cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
|
|
469
478
|
) -> bool:
|
|
470
479
|
if llm_spec.model_format not in ["ggufv2"]:
|
|
@@ -565,7 +574,7 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
565
574
|
)
|
|
566
575
|
|
|
567
576
|
@classmethod
|
|
568
|
-
def
|
|
577
|
+
def match_json(
|
|
569
578
|
cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
|
|
570
579
|
) -> bool:
|
|
571
580
|
if llm_spec.model_format not in ["ggufv2"]:
|
|
@@ -589,7 +598,9 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
|
|
|
589
598
|
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
590
599
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
591
600
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
592
|
-
full_context_kwargs =
|
|
601
|
+
full_context_kwargs = (
|
|
602
|
+
self._get_chat_template_kwargs_from_generate_config(generate_config) or {} # type: ignore
|
|
603
|
+
)
|
|
593
604
|
if tools:
|
|
594
605
|
if (
|
|
595
606
|
model_family in QWEN_TOOL_CALL_FAMILY
|