xinference 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (64) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +16 -11
  5. xinference/client/restful/restful_client.py +12 -2
  6. xinference/conftest.py +13 -2
  7. xinference/constants.py +2 -0
  8. xinference/core/supervisor.py +32 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/llm/__init__.py +6 -0
  12. xinference/model/llm/llm_family.json +711 -10
  13. xinference/model/llm/llm_family_modelscope.json +557 -7
  14. xinference/model/llm/pytorch/chatglm.py +2 -1
  15. xinference/model/llm/pytorch/core.py +2 -0
  16. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  17. xinference/model/llm/pytorch/internlm2.py +2 -1
  18. xinference/model/llm/pytorch/omnilmm.py +153 -0
  19. xinference/model/llm/sglang/__init__.py +13 -0
  20. xinference/model/llm/sglang/core.py +365 -0
  21. xinference/model/llm/utils.py +46 -13
  22. xinference/model/llm/vllm/core.py +10 -0
  23. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  24. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  25. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  26. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  27. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  28. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  29. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  30. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  31. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  32. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  33. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  34. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  35. xinference/thirdparty/omnilmm/__init__.py +0 -0
  36. xinference/thirdparty/omnilmm/chat.py +216 -0
  37. xinference/thirdparty/omnilmm/constants.py +4 -0
  38. xinference/thirdparty/omnilmm/conversation.py +332 -0
  39. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  40. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  41. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  42. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  43. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  44. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  45. xinference/thirdparty/omnilmm/utils.py +134 -0
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.98516614.js +3 -0
  49. xinference/web/ui/build/static/js/main.98516614.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +1 -0
  54. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/METADATA +21 -5
  55. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/RECORD +60 -31
  56. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  57. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  60. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.98516614.js.LICENSE.txt} +0 -0
  61. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/LICENSE +0 -0
  62. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/WHEEL +0 -0
  63. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/entry_points.txt +0 -0
  64. {xinference-0.9.3.dist-info → xinference-0.10.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,232 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import base64
15
+ import logging
16
+ import os.path
17
+ import tempfile
18
+ import time
19
+ import uuid
20
+ from concurrent.futures import ThreadPoolExecutor
21
+ from io import BytesIO
22
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
23
+
24
+ import requests
25
+ import torch
26
+
27
+ from ....model.utils import select_device
28
+ from ....types import (
29
+ ChatCompletion,
30
+ ChatCompletionChoice,
31
+ ChatCompletionChunk,
32
+ ChatCompletionMessage,
33
+ CompletionUsage,
34
+ )
35
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
36
+ from .core import PytorchChatModel, PytorchGenerateConfig
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class DeepSeekVLChatModel(PytorchChatModel):
42
+ def __init__(self, *args, **kwargs):
43
+ super().__init__(*args, **kwargs)
44
+ self._tokenizer = None
45
+ self._model = None
46
+ self._vl_chat_processor = None
47
+ self._type = None
48
+
49
+ @classmethod
50
+ def match(
51
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
52
+ ) -> bool:
53
+ if "deepseek" in model_family.model_name:
54
+ return True
55
+ return False
56
+
57
+ def load(self):
58
+ from transformers import AutoModelForCausalLM
59
+
60
+ from ....thirdparty.deepseek_vl.models import (
61
+ MultiModalityCausalLM,
62
+ VLChatProcessor,
63
+ )
64
+
65
+ self._device = self._pytorch_model_config.get("device", "auto")
66
+ self._device = select_device(self._device)
67
+ self._type = torch.float16 if self._device == "mps" else torch.bfloat16
68
+
69
+ # specify the path to the model
70
+ self._vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(
71
+ self.model_path
72
+ )
73
+ self._tokenizer = self._vl_chat_processor.tokenizer
74
+
75
+ vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
76
+ self.model_path, trust_remote_code=True, device_map=self._device
77
+ )
78
+ self._model = vl_gpt.to(self._type).eval()
79
+
80
+ @staticmethod
81
+ def _message_content_to_deepseek(content) -> Tuple[str, List[str]]:
82
+ def _ensure_url(_url):
83
+ if _url.startswith("data:"):
84
+ logging.info("Parse url by base64 decoder.")
85
+ # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
86
+ # e.g. f"data:image/jpeg;base64,{base64_image}"
87
+ _type, data = _url.split(";")
88
+ _, ext = _type.split("/")
89
+ data = data[len("base64,") :]
90
+ data = base64.b64decode(data.encode("utf-8"))
91
+
92
+ with tempfile.NamedTemporaryFile(suffix=f".{ext}", delete=False) as f:
93
+ f.write(data)
94
+ logging.info("Dump base64 data to %s", f.name)
95
+ return f.name
96
+ else:
97
+ if len(_url) > 2048:
98
+ raise Exception(f"Image url is too long, {len(_url)} > 2048.")
99
+
100
+ return _url
101
+
102
+ def _download(_images):
103
+ local_images = []
104
+
105
+ # To make requests.get works
106
+ headers = {
107
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
108
+ }
109
+ with ThreadPoolExecutor() as executor:
110
+ for url in images:
111
+ try:
112
+ if os.path.exists(url):
113
+ local_images.append(url)
114
+ continue
115
+ except Exception as e:
116
+ logger.debug("Image is remote: %s, e: %s", url, e)
117
+ pass
118
+ # Append a placeholder
119
+ local_images.append(None)
120
+
121
+ def _fill_placeholder(_url, _index):
122
+ response = requests.get(url, headers=headers)
123
+ local_images[_index] = BytesIO(response.content)
124
+
125
+ executor.submit(_fill_placeholder, url, len(local_images) - 1)
126
+ return local_images
127
+
128
+ if not isinstance(content, str):
129
+ # TODO(codingl2k1): Optimize _ensure_url
130
+
131
+ images = []
132
+ new_content = []
133
+ for c in content:
134
+ c_type = c.get("type")
135
+ if c_type == "image_url":
136
+ images.append(_ensure_url(c["image_url"]["url"]))
137
+ elif c_type == "text":
138
+ new_content.append(c["text"])
139
+ if images:
140
+ new_content.insert(0, "<image_placeholder>")
141
+ images = _download(images)
142
+ return "".join(new_content), images
143
+ return content, []
144
+
145
+ def chat(
146
+ self,
147
+ prompt: Union[str, List[Dict]],
148
+ system_prompt: Optional[str] = None,
149
+ chat_history: Optional[List[ChatCompletionMessage]] = None,
150
+ generate_config: Optional[PytorchGenerateConfig] = None,
151
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
152
+ if generate_config and generate_config.get("stream"):
153
+ raise Exception(
154
+ f"Chat with model {self.model_family.model_name} does not support stream."
155
+ )
156
+ prompt, images = self._message_content_to_deepseek(prompt)
157
+ prompt_messages: List[Dict[str, Any]] = [
158
+ {
159
+ "role": "User",
160
+ "content": prompt,
161
+ },
162
+ {"role": "Assistant", "content": ""},
163
+ ]
164
+ if images:
165
+ prompt_messages[0]["images"] = images
166
+
167
+ # Convert openai history to qwen vl history
168
+ deepseek_history = []
169
+ for h in chat_history or []:
170
+ role = h["role"]
171
+ if role == "user":
172
+ content, images = self._message_content_to_deepseek(h["content"])
173
+ msg: Dict[str, Any] = {
174
+ "role": "User",
175
+ "content": content,
176
+ }
177
+ if images:
178
+ msg["images"] = images
179
+ deepseek_history.append(msg)
180
+ elif role == "assistant":
181
+ deepseek_history.append({"role": "Assistant", "content": h["content"]})
182
+ else:
183
+ logger.error("Unexpected msg in chat history: %s", h)
184
+
185
+ deepseek_history.extend(prompt_messages)
186
+
187
+ from ....thirdparty.deepseek_vl.utils.io import load_pil_images
188
+
189
+ # load images and prepare for inputs
190
+ pil_images = load_pil_images(deepseek_history)
191
+ prepare_inputs = self._vl_chat_processor(
192
+ conversations=deepseek_history, images=pil_images, force_batchify=True
193
+ ).to(self._model.device, self._model.dtype)
194
+
195
+ # run image encoder to get the image embeddings
196
+ inputs_embeds = self._model.prepare_inputs_embeds(**prepare_inputs)
197
+
198
+ # run the model to get the response
199
+ outputs = self._model.language_model.generate(
200
+ inputs_embeds=inputs_embeds,
201
+ attention_mask=prepare_inputs.attention_mask,
202
+ pad_token_id=self._tokenizer.eos_token_id,
203
+ bos_token_id=self._tokenizer.bos_token_id,
204
+ eos_token_id=self._tokenizer.eos_token_id,
205
+ max_new_tokens=512,
206
+ do_sample=True,
207
+ top_p=0.95,
208
+ temperature=0.2,
209
+ repetition_penalty=1.1,
210
+ use_cache=True,
211
+ )
212
+
213
+ answer = self._tokenizer.decode(
214
+ outputs[0].cpu().tolist(), skip_special_tokens=True
215
+ )
216
+
217
+ return ChatCompletion(
218
+ id="chat" + str(uuid.uuid1()),
219
+ object="chat.completion",
220
+ created=int(time.time()),
221
+ model=self.model_uid,
222
+ choices=[
223
+ ChatCompletionChoice(
224
+ index=0,
225
+ message={"role": "assistant", "content": answer},
226
+ finish_reason="stop",
227
+ )
228
+ ],
229
+ usage=CompletionUsage(
230
+ prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
231
+ ),
232
+ )
@@ -118,6 +118,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
118
118
 
119
119
  def _stream_generator():
120
120
  last_chunk_text_length = 0
121
+ chunk_id = "chat-" + str(uuid.uuid1())
121
122
  for chunk_text, _ in self._model.stream_chat(
122
123
  self._tokenizer, prompt, input_history, **kwargs
123
124
  ):
@@ -127,7 +128,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
127
128
  text=chunk_text, index=0, logprobs=None, finish_reason=None
128
129
  )
129
130
  yield CompletionChunk(
130
- id=str(uuid.uuid1()),
131
+ id=chunk_id,
131
132
  object="text_completion",
132
133
  created=int(time.time()),
133
134
  model=self.model_uid,
@@ -0,0 +1,153 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import base64
15
+ import json
16
+ import logging
17
+ import operator
18
+ import tempfile
19
+ import time
20
+ import uuid
21
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
22
+
23
+ from ....thirdparty.omnilmm.chat import OmniLMMChat, img2base64
24
+ from ....types import (
25
+ ChatCompletion,
26
+ ChatCompletionChoice,
27
+ ChatCompletionChunk,
28
+ ChatCompletionMessage,
29
+ CompletionUsage,
30
+ )
31
+ from ...utils import select_device
32
+ from ..llm_family import LLMFamilyV1, LLMSpecV1
33
+ from .core import PytorchChatModel, PytorchGenerateConfig
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class OmniLMMModel(PytorchChatModel):
39
+ def __init__(self, *args, **kwargs):
40
+ super().__init__(*args, **kwargs)
41
+ self._model = None
42
+
43
+ @classmethod
44
+ def match(
45
+ cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
46
+ ) -> bool:
47
+ if "OmniLMM" in model_family.model_name:
48
+ return True
49
+ return False
50
+
51
+ def load(self):
52
+ device = self._pytorch_model_config.get("device", "auto")
53
+ device = select_device(device)
54
+
55
+ self._model = OmniLMMChat(self.model_path, device_map=device)
56
+
57
+ def _message_content_to_OmniLMM(
58
+ self, content
59
+ ) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]:
60
+ def _ensure_url(_url):
61
+ if _url.startswith("data:"):
62
+ logging.info("Parse url by base64 decoder.")
63
+ # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
64
+ # e.g. f"data:image/jpeg;base64,{base64_image}"
65
+ _type, data = _url.split(";")
66
+ _, ext = _type.split("/")
67
+ data = data[len("base64,") :]
68
+ data = base64.b64decode(data.encode("utf-8"))
69
+
70
+ with tempfile.NamedTemporaryFile(suffix=f".{ext}", delete=False) as f:
71
+ f.write(data)
72
+ logging.info("Dump base64 data to %s", f.name)
73
+ return f.name
74
+ else:
75
+ if len(_url) > 2048:
76
+ raise Exception(f"Image url is too long, {len(_url)} > 2048.")
77
+ return _url
78
+
79
+ if not isinstance(content, str):
80
+ images = []
81
+ other_content = []
82
+
83
+ for c in content:
84
+ if c.get("type") == "image_url":
85
+ images.append(
86
+ {"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
87
+ )
88
+ else:
89
+ other_content.append(c)
90
+
91
+ images = sorted(images, key=operator.itemgetter("type"))
92
+ other_content = sorted(other_content, key=operator.itemgetter("type"))
93
+
94
+ return images, other_content
95
+ return [], [{"type": "text", "text": content}]
96
+
97
+ def chat(
98
+ self,
99
+ prompt: Union[str, List[Dict]],
100
+ system_prompt: Optional[str] = None,
101
+ chat_history: Optional[List[ChatCompletionMessage]] = None,
102
+ generate_config: Optional[PytorchGenerateConfig] = None,
103
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
104
+ if generate_config and generate_config.get("stream"):
105
+ raise Exception(
106
+ f"Chat with model {self.model_family.model_name} does not support stream."
107
+ )
108
+ image_first, prompt = self._message_content_to_OmniLMM(prompt)
109
+
110
+ msgs = []
111
+ query_to_response: List[Dict] = []
112
+ image_another = []
113
+ for h in chat_history or []:
114
+ role = h["role"]
115
+ image_tmp, content = self._message_content_to_OmniLMM(h["content"])
116
+ if image_tmp != []:
117
+ image_another = image_tmp
118
+ if len(query_to_response) == 0 and role == "user":
119
+ query_to_response.append(
120
+ {"role": "user", "content": content[0]["text"]}
121
+ )
122
+ if len(query_to_response) == 1 and role == "assistant":
123
+ query_to_response.append(
124
+ {"role": "assistant", "content": content[0]["text"]}
125
+ )
126
+ if len(query_to_response) == 2:
127
+ msgs.extend(query_to_response)
128
+ query_to_response = []
129
+ if image_first != []:
130
+ image = image_first
131
+ if image_another != []:
132
+ image = image_another
133
+ im_64 = img2base64(image[0]["image"])
134
+ msgs.append({"role": "user", "content": prompt[0]["text"]})
135
+ input = {"image": im_64, "question": json.dumps(msgs, ensure_ascii=True)}
136
+ answer = self._model.chat(input=input)
137
+
138
+ return ChatCompletion(
139
+ id="chat" + str(uuid.uuid1()),
140
+ object="chat.completion",
141
+ created=int(time.time()),
142
+ model=self.model_uid,
143
+ choices=[
144
+ ChatCompletionChoice(
145
+ index=0,
146
+ message={"role": "assistant", "content": answer},
147
+ finish_reason="stop",
148
+ )
149
+ ],
150
+ usage=CompletionUsage(
151
+ prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
152
+ ),
153
+ )
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.