xinference 1.5.0.post1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +10 -3
  3. xinference/constants.py +5 -1
  4. xinference/core/supervisor.py +12 -3
  5. xinference/core/utils.py +1 -1
  6. xinference/core/worker.py +2 -2
  7. xinference/deploy/cmdline.py +17 -0
  8. xinference/model/audio/core.py +1 -1
  9. xinference/model/audio/model_spec.json +43 -43
  10. xinference/model/audio/model_spec_modelscope.json +13 -13
  11. xinference/model/llm/__init__.py +3 -5
  12. xinference/model/llm/core.py +14 -0
  13. xinference/model/llm/llama_cpp/core.py +15 -4
  14. xinference/model/llm/llm_family.json +3251 -4304
  15. xinference/model/llm/llm_family.py +62 -6
  16. xinference/model/llm/llm_family_csghub.json +0 -32
  17. xinference/model/llm/llm_family_modelscope.json +1161 -1789
  18. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  19. xinference/model/llm/lmdeploy/core.py +7 -2
  20. xinference/model/llm/mlx/core.py +19 -6
  21. xinference/model/llm/sglang/core.py +25 -10
  22. xinference/model/llm/transformers/chatglm.py +8 -1
  23. xinference/model/llm/transformers/cogagent.py +10 -12
  24. xinference/model/llm/transformers/cogvlm2.py +6 -3
  25. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  26. xinference/model/llm/transformers/core.py +50 -58
  27. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  28. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  29. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  30. xinference/model/llm/transformers/gemma3.py +15 -7
  31. xinference/model/llm/transformers/glm4v.py +2 -20
  32. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  33. xinference/model/llm/transformers/intern_vl.py +3 -6
  34. xinference/model/llm/transformers/internlm2.py +1 -1
  35. xinference/model/llm/transformers/minicpmv25.py +4 -2
  36. xinference/model/llm/transformers/minicpmv26.py +5 -3
  37. xinference/model/llm/transformers/omnilmm.py +1 -1
  38. xinference/model/llm/transformers/opt.py +1 -1
  39. xinference/model/llm/transformers/ovis2.py +302 -0
  40. xinference/model/llm/transformers/qwen-omni.py +2 -1
  41. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  42. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  43. xinference/model/llm/transformers/qwen_vl.py +5 -2
  44. xinference/model/llm/utils.py +28 -0
  45. xinference/model/llm/vllm/core.py +73 -9
  46. xinference/model/llm/vllm/distributed_executor.py +8 -7
  47. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  48. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  49. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  50. xinference/model/llm/vllm/xavier/executor.py +1 -1
  51. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
  52. xinference/model/video/diffusers.py +30 -3
  53. xinference/model/video/model_spec.json +46 -0
  54. xinference/model/video/model_spec_modelscope.json +48 -0
  55. xinference/types.py +2 -0
  56. xinference/web/ui/build/asset-manifest.json +6 -6
  57. xinference/web/ui/build/index.html +1 -1
  58. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  59. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  60. xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
  61. xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  69. xinference/web/ui/src/locales/en.json +1 -0
  70. xinference/web/ui/src/locales/zh.json +1 -0
  71. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
  72. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
  73. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
  74. xinference/model/llm/transformers/compression.py +0 -258
  75. xinference/model/llm/transformers/yi_vl.py +0 -239
  76. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.58bd483c.js +0 -3
  78. xinference/web/ui/build/static/js/main.58bd483c.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  86. /xinference/web/ui/build/static/js/{main.58bd483c.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
  87. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
  88. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
  89. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
@@ -57,7 +57,7 @@ from .llm_family import (
57
57
 
58
58
  def check_format_with_engine(model_format, engine):
59
59
  # only llama-cpp-python support and only support ggufv2
60
- if model_format in ["ggufv2"] and engine != "llama.cpp":
60
+ if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
61
61
  return False
62
62
  if model_format not in ["ggufv2"] and engine == "llama.cpp":
63
63
  return False
@@ -147,13 +147,12 @@ def _install():
147
147
  from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
148
148
  from .transformers.glm4v import Glm4VModel
149
149
  from .transformers.glm_edge_v import GlmEdgeVModel
150
- from .transformers.internlm2 import Internlm2PytorchChatModel
151
150
  from .transformers.minicpmv25 import MiniCPMV25Model
152
151
  from .transformers.minicpmv26 import MiniCPMV26Model
153
152
  from .transformers.opt import OptPytorchModel
153
+ from .transformers.ovis2 import Ovis2ChatModel
154
154
  from .transformers.qwen2_audio import Qwen2AudioChatModel
155
155
  from .transformers.qwen_vl import QwenVLChatModel
156
- from .transformers.yi_vl import YiVLChatModel
157
156
  from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
158
157
 
159
158
  try:
@@ -180,10 +179,8 @@ def _install():
180
179
  [
181
180
  ChatglmPytorchChatModel,
182
181
  PytorchChatModel,
183
- Internlm2PytorchChatModel,
184
182
  QwenVLChatModel,
185
183
  Qwen2AudioChatModel,
186
- YiVLChatModel,
187
184
  DeepSeekVLChatModel,
188
185
  DeepSeekVL2ChatModel,
189
186
  PytorchModel,
@@ -199,6 +196,7 @@ def _install():
199
196
  CogAgentChatModel,
200
197
  Gemma3TextChatModel,
201
198
  Gemma3ChatModel,
199
+ Ovis2ChatModel,
202
200
  ]
203
201
  )
204
202
  if OmniLMMModel: # type: ignore
@@ -65,6 +65,11 @@ class LLM(abc.ABC):
65
65
  if kwargs:
66
66
  raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
67
67
 
68
+ @classmethod
69
+ @abstractmethod
70
+ def check_lib(cls) -> bool:
71
+ raise NotImplementedError
72
+
68
73
  @staticmethod
69
74
  def _is_darwin_and_apple_silicon():
70
75
  return platform.system() == "Darwin" and platform.processor() == "arm"
@@ -117,6 +122,15 @@ class LLM(abc.ABC):
117
122
  @classmethod
118
123
  def match(
119
124
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
125
+ ) -> bool:
126
+ if not cls.check_lib():
127
+ return False
128
+ return cls.match_json(llm_family, llm_spec, quantization)
129
+
130
+ @classmethod
131
+ @abstractmethod
132
+ def match_json(
133
+ cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
120
134
  ) -> bool:
121
135
  raise NotImplementedError
122
136
 
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import concurrent.futures
15
+ import importlib.util
15
16
  import logging
16
17
  import os
17
18
  import queue
@@ -116,7 +117,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
116
117
  return generate_config
117
118
 
118
119
  @classmethod
119
- def match(
120
+ def check_lib(cls) -> bool:
121
+ return importlib.util.find_spec("xllamacpp") is not None
122
+
123
+ @classmethod
124
+ def match_json(
120
125
  cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
121
126
  ) -> bool:
122
127
  if llm_spec.model_format not in ["ggufv2"]:
@@ -464,7 +469,11 @@ class LlamaCppModel(LLM):
464
469
  raise RuntimeError(f"Load model {self.model_family.model_name} failed")
465
470
 
466
471
  @classmethod
467
- def match(
472
+ def check_lib(cls) -> bool:
473
+ return importlib.util.find_spec("llama_cpp") is not None
474
+
475
+ @classmethod
476
+ def match_json(
468
477
  cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
469
478
  ) -> bool:
470
479
  if llm_spec.model_format not in ["ggufv2"]:
@@ -565,7 +574,7 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
565
574
  )
566
575
 
567
576
  @classmethod
568
- def match(
577
+ def match_json(
569
578
  cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
570
579
  ) -> bool:
571
580
  if llm_spec.model_format not in ["ggufv2"]:
@@ -589,7 +598,9 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
589
598
  ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
590
599
  model_family = self.model_family.model_family or self.model_family.model_name
591
600
  tools = generate_config.pop("tools", []) if generate_config else None
592
- full_context_kwargs = {}
601
+ full_context_kwargs = (
602
+ self._get_chat_template_kwargs_from_generate_config(generate_config) or {} # type: ignore
603
+ )
593
604
  if tools:
594
605
  if (
595
606
  model_family in QWEN_TOOL_CALL_FAMILY