xinference 1.6.0.post1__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (87) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/restful_client.py +1 -1
  3. xinference/conftest.py +0 -7
  4. xinference/core/media_interface.py +9 -8
  5. xinference/core/model.py +13 -6
  6. xinference/core/scheduler.py +1 -10
  7. xinference/core/worker.py +0 -10
  8. xinference/model/audio/model_spec.json +53 -1
  9. xinference/model/audio/model_spec_modelscope.json +57 -1
  10. xinference/model/embedding/core.py +19 -11
  11. xinference/model/image/model_spec.json +10 -1
  12. xinference/model/image/model_spec_modelscope.json +20 -0
  13. xinference/model/llm/__init__.py +6 -54
  14. xinference/model/llm/core.py +19 -5
  15. xinference/model/llm/llama_cpp/core.py +59 -3
  16. xinference/model/llm/llama_cpp/memory.py +455 -0
  17. xinference/model/llm/llm_family.json +185 -397
  18. xinference/model/llm/llm_family.py +88 -16
  19. xinference/model/llm/llm_family_modelscope.json +199 -421
  20. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  21. xinference/model/llm/sglang/core.py +4 -0
  22. xinference/model/llm/transformers/__init__.py +27 -6
  23. xinference/model/llm/transformers/chatglm.py +4 -2
  24. xinference/model/llm/transformers/core.py +49 -28
  25. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  26. xinference/model/llm/transformers/gemma3.py +119 -164
  27. xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
  28. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  29. xinference/model/llm/transformers/multimodal/core.py +205 -0
  30. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  31. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  32. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  33. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  34. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  35. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  36. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  37. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  38. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  39. xinference/model/llm/transformers/opt.py +4 -2
  40. xinference/model/llm/transformers/utils.py +6 -37
  41. xinference/model/llm/vllm/core.py +4 -0
  42. xinference/model/rerank/core.py +7 -1
  43. xinference/model/rerank/utils.py +17 -0
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
  47. xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
  52. xinference/web/ui/src/locales/en.json +3 -1
  53. xinference/web/ui/src/locales/zh.json +3 -1
  54. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/METADATA +6 -4
  55. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
  56. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
  57. xinference/model/llm/transformers/cogvlm2.py +0 -442
  58. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  59. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  60. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  61. xinference/model/llm/transformers/intern_vl.py +0 -526
  62. xinference/model/llm/transformers/internlm2.py +0 -94
  63. xinference/model/llm/transformers/minicpmv25.py +0 -193
  64. xinference/model/llm/transformers/omnilmm.py +0 -132
  65. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  66. xinference/model/llm/transformers/qwen_vl.py +0 -360
  67. xinference/thirdparty/omnilmm/LICENSE +0 -201
  68. xinference/thirdparty/omnilmm/__init__.py +0 -0
  69. xinference/thirdparty/omnilmm/chat.py +0 -218
  70. xinference/thirdparty/omnilmm/constants.py +0 -4
  71. xinference/thirdparty/omnilmm/conversation.py +0 -332
  72. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  73. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  74. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  75. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  76. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  77. xinference/thirdparty/omnilmm/utils.py +0 -134
  78. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  79. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  84. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
  85. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
  86. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
  87. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,213 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import logging
15
- import uuid
16
- from concurrent.futures import ThreadPoolExecutor
17
- from threading import Thread
18
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
19
-
20
- import torch
21
-
22
- from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
- from ...utils import select_device
24
- from ..llm_family import LLMFamilyV1, LLMSpecV1
25
- from ..utils import (
26
- _decode_image_without_rgb,
27
- generate_chat_completion,
28
- generate_completion_chunk,
29
- )
30
- from .core import PytorchChatModel, PytorchGenerateConfig
31
- from .utils import cache_clean
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
-
36
- class GlmEdgeVModel(PytorchChatModel):
37
- def __init__(self, *args, **kwargs):
38
- super().__init__(*args, **kwargs)
39
- self._device = None
40
- self._tokenizer = None
41
- self._model = None
42
- self._processor = None
43
-
44
- @classmethod
45
- def match_json(
46
- cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
47
- ) -> bool:
48
- family = model_family.model_family or model_family.model_name
49
- if "glm-edge-v" in family.lower():
50
- return True
51
- return False
52
-
53
- def load(self):
54
- from transformers import AutoImageProcessor, AutoModelForCausalLM, AutoTokenizer
55
-
56
- device = self._pytorch_model_config.get("device", "auto")
57
- self._device = select_device(device)
58
-
59
- kwargs = {"device_map": self._device}
60
- kwargs = self.apply_bnb_quantization(kwargs)
61
-
62
- processor = AutoImageProcessor.from_pretrained(
63
- self.model_path, trust_remote_code=True
64
- )
65
- self._processor = processor
66
-
67
- model = AutoModelForCausalLM.from_pretrained(
68
- self.model_path,
69
- trust_remote_code=True,
70
- torch_dtype=torch.bfloat16,
71
- device_map="auto",
72
- **kwargs
73
- )
74
-
75
- self._model = model
76
-
77
- tokenizer = AutoTokenizer.from_pretrained(
78
- self.model_path, trust_remote_code=True
79
- )
80
- self._tokenizer = tokenizer
81
-
82
- @staticmethod
83
- def _get_processed_msgs(
84
- messages: List[Dict],
85
- ) -> Tuple[List[Dict[str, Any]], List[Any]]:
86
- res = []
87
- img = []
88
- for message in messages:
89
- role = message["role"]
90
- content = message["content"]
91
- if isinstance(content, str):
92
- res.append({"role": role, "content": content})
93
- else:
94
- texts = []
95
- image_urls = []
96
- for c in content:
97
- c_type = c.get("type")
98
- if c_type == "text":
99
- texts.append(c["text"])
100
- else:
101
- assert (
102
- c_type == "image_url"
103
- ), "Please follow the image input of the OpenAI API."
104
- image_urls.append(c["image_url"]["url"])
105
- if len(image_urls) > 1:
106
- raise RuntimeError("Only one image per message is supported")
107
- image_futures = []
108
- with ThreadPoolExecutor() as executor:
109
- for image_url in image_urls:
110
- fut = executor.submit(_decode_image_without_rgb, image_url)
111
- image_futures.append(fut)
112
- images = [fut.result() for fut in image_futures]
113
- assert len(images) <= 1
114
- text = " ".join(texts)
115
- img.extend(images)
116
- if images:
117
- res.append(
118
- {
119
- "role": role,
120
- "content": [
121
- {"type": "image"},
122
- {"type": "text", "text": text},
123
- ],
124
- }
125
- )
126
- else:
127
- res.append({"role": role, "content": text})
128
- return res, img
129
-
130
- @cache_clean
131
- def chat(
132
- self,
133
- messages: List[Dict],
134
- generate_config: Optional[PytorchGenerateConfig] = None,
135
- ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
136
- from transformers import TextIteratorStreamer
137
-
138
- if not generate_config:
139
- generate_config = {}
140
-
141
- stream = generate_config.get("stream", False)
142
- msgs, imgs = self._get_processed_msgs(messages)
143
-
144
- inputs = self._tokenizer.apply_chat_template(
145
- msgs,
146
- add_generation_prompt=True,
147
- tokenize=True,
148
- return_tensors="pt",
149
- return_dict=True,
150
- ) # chat mode
151
- inputs = inputs.to(self._model.device)
152
-
153
- generate_kwargs = {
154
- **inputs,
155
- }
156
- if len(imgs) > 0:
157
- generate_kwargs["pixel_values"] = torch.tensor(
158
- self._processor(imgs[-1]).pixel_values
159
- ).to(self._model.device)
160
- stop_str = "<|endoftext|>"
161
-
162
- if stream:
163
- streamer = TextIteratorStreamer(
164
- tokenizer=self._tokenizer,
165
- timeout=60,
166
- skip_prompt=True,
167
- skip_special_tokens=True,
168
- )
169
- generate_kwargs = {
170
- **generate_kwargs,
171
- "streamer": streamer,
172
- }
173
- t = Thread(target=self._model.generate, kwargs=generate_kwargs)
174
- t.start()
175
-
176
- it = self.chat_stream(streamer, stop_str)
177
- return self._to_chat_completion_chunks(it)
178
- else:
179
- with torch.no_grad():
180
- outputs = self._model.generate(**generate_kwargs)
181
- outputs = outputs[0][len(inputs["input_ids"][0]) :]
182
- response = self._tokenizer.decode(outputs)
183
- if response.endswith(stop_str):
184
- response = response[: -len(stop_str)]
185
- return generate_chat_completion(self.model_uid, response)
186
-
187
- def chat_stream(self, streamer, stop_str) -> Iterator[CompletionChunk]:
188
- completion_id = str(uuid.uuid1())
189
- for new_text in streamer:
190
- if not new_text.endswith(stop_str):
191
- yield generate_completion_chunk(
192
- chunk_text=new_text,
193
- finish_reason=None,
194
- chunk_id=completion_id,
195
- model_uid=self.model_uid,
196
- prompt_tokens=-1,
197
- completion_tokens=-1,
198
- total_tokens=-1,
199
- has_choice=True,
200
- has_content=True,
201
- )
202
-
203
- yield generate_completion_chunk(
204
- chunk_text=None,
205
- finish_reason="stop",
206
- chunk_id=completion_id,
207
- model_uid=self.model_uid,
208
- prompt_tokens=-1,
209
- completion_tokens=-1,
210
- total_tokens=-1,
211
- has_choice=True,
212
- has_content=False,
213
- )