xinference 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-03-15T11:03:04+0800",
11
+ "date": "2024-03-21T14:58:01+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "60f098c78adb7e28dd564278a7741b56eaf062d8",
15
- "version": "0.9.3"
14
+ "full-revisionid": "2c9465ade7f358d57d4bc087277882d896a8de15",
15
+ "version": "0.9.4"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -59,6 +59,7 @@ from ..core.utils import json_dumps
59
59
  from ..types import (
60
60
  SPECIAL_TOOL_PROMPT,
61
61
  ChatCompletion,
62
+ ChatCompletionMessage,
62
63
  Completion,
63
64
  CreateChatCompletion,
64
65
  CreateCompletion,
@@ -1258,25 +1259,21 @@ class RESTfulAPI:
1258
1259
  status_code=400, detail="Invalid input. Please specify the prompt."
1259
1260
  )
1260
1261
 
1261
- system_messages = []
1262
+ system_messages: List["ChatCompletionMessage"] = []
1263
+ system_messages_contents = []
1262
1264
  non_system_messages = []
1263
1265
  for msg in messages:
1264
1266
  assert (
1265
1267
  msg.get("content") != SPECIAL_TOOL_PROMPT
1266
1268
  ), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
1267
1269
  if msg["role"] == "system":
1268
- system_messages.append(msg)
1270
+ system_messages_contents.append(msg["content"])
1269
1271
  else:
1270
1272
  non_system_messages.append(msg)
1273
+ system_messages.append(
1274
+ {"role": "system", "content": ". ".join(system_messages_contents)}
1275
+ )
1271
1276
 
1272
- if len(system_messages) > 1:
1273
- raise HTTPException(
1274
- status_code=400, detail="Multiple system messages are not supported."
1275
- )
1276
- if len(system_messages) == 1 and messages[0]["role"] != "system":
1277
- raise HTTPException(
1278
- status_code=400, detail="System message should be the first one."
1279
- )
1280
1277
  assert non_system_messages
1281
1278
 
1282
1279
  has_tool_message = messages[-1].get("role") == "tool"
xinference/constants.py CHANGED
@@ -25,6 +25,7 @@ XINFERENCE_ENV_HEALTH_CHECK_INTERVAL = "XINFERENCE_HEALTH_CHECK_INTERVAL"
25
25
  XINFERENCE_ENV_HEALTH_CHECK_TIMEOUT = "XINFERENCE_HEALTH_CHECK_TIMEOUT"
26
26
  XINFERENCE_ENV_DISABLE_HEALTH_CHECK = "XINFERENCE_DISABLE_HEALTH_CHECK"
27
27
  XINFERENCE_ENV_DISABLE_VLLM = "XINFERENCE_DISABLE_VLLM"
28
+ XINFERENCE_ENV_ENABLE_SGLANG = "XINFERENCE_ENABLE_SGLANG"
28
29
 
29
30
 
30
31
  def get_xinference_home() -> str:
@@ -64,3 +65,4 @@ XINFERENCE_DISABLE_HEALTH_CHECK = bool(
64
65
  int(os.environ.get(XINFERENCE_ENV_DISABLE_HEALTH_CHECK, 0))
65
66
  )
66
67
  XINFERENCE_DISABLE_VLLM = bool(int(os.environ.get(XINFERENCE_ENV_DISABLE_VLLM, 0)))
68
+ XINFERENCE_ENABLE_SGLANG = bool(int(os.environ.get(XINFERENCE_ENV_ENABLE_SGLANG, 0)))
@@ -60,6 +60,7 @@ def _install():
60
60
  from .pytorch.qwen_vl import QwenVLChatModel
61
61
  from .pytorch.vicuna import VicunaPytorchChatModel
62
62
  from .pytorch.yi_vl import YiVLChatModel
63
+ from .sglang.core import SGLANGChatModel, SGLANGModel
63
64
  from .vllm.core import VLLMChatModel, VLLMModel
64
65
 
65
66
  # register llm classes.
@@ -79,6 +80,7 @@ def _install():
79
80
  CtransformersModel,
80
81
  ]
81
82
  )
83
+ LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
82
84
  LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
83
85
  LLM_CLASSES.extend(
84
86
  [
@@ -98,6 +98,72 @@
98
98
  ]
99
99
  }
100
100
  },
101
+ {
102
+ "version": 1,
103
+ "context_length": 8194,
104
+ "model_name": "codeshell",
105
+ "model_lang": [
106
+ "en",
107
+ "zh"
108
+ ],
109
+ "model_ability": [
110
+ "generate"
111
+ ],
112
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
113
+ "model_specs": [
114
+ {
115
+ "model_format": "pytorch",
116
+ "model_size_in_billions": 7,
117
+ "quantizations": [
118
+ "none"
119
+ ],
120
+ "model_id": "WisdomShell/CodeShell-7B",
121
+ "model_revision": "1c79ab7fd316a62ab41d764facd3548a23fa5dee"
122
+ }
123
+ ]
124
+ },
125
+ {
126
+ "version": 1,
127
+ "context_length": 8194,
128
+ "model_name": "codeshell-chat",
129
+ "model_lang": [
130
+ "en",
131
+ "zh"
132
+ ],
133
+ "model_ability": [
134
+ "chat"
135
+ ],
136
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
137
+ "model_specs": [
138
+ {
139
+ "model_format": "pytorch",
140
+ "model_size_in_billions": 7,
141
+ "quantizations": [
142
+ "none"
143
+ ],
144
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
145
+ "model_revision": "3cb06f589b7b1e2f8e728c77280b1114191d24de"
146
+ }
147
+ ],
148
+ "prompt_style": {
149
+ "style_name": "CodeShell",
150
+ "system_prompt": "",
151
+ "roles": [
152
+ "## human:",
153
+ "## assistant: "
154
+ ],
155
+ "intra_message_sep": "",
156
+ "inter_message_sep": "",
157
+ "stop_token_ids": [
158
+ 70000
159
+ ],
160
+ "stop": [
161
+ "<|endoftext|>",
162
+ "|||",
163
+ "|<end>|"
164
+ ]
165
+ }
166
+ },
101
167
  {
102
168
  "version": 1,
103
169
  "context_length": 2048,
@@ -573,7 +639,7 @@
573
639
  64797,
574
640
  2
575
641
  ],
576
- "stop":[
642
+ "stop": [
577
643
  "<|user|>",
578
644
  "<|observation|>"
579
645
  ]
@@ -616,7 +682,7 @@
616
682
  64797,
617
683
  2
618
684
  ],
619
- "stop":[
685
+ "stop": [
620
686
  "<|user|>",
621
687
  "<|observation|>"
622
688
  ]
@@ -667,7 +733,6 @@
667
733
  ]
668
734
  }
669
735
  },
670
-
671
736
  {
672
737
  "version": 1,
673
738
  "context_length": 2048,
@@ -715,8 +780,7 @@
715
780
  "model_revision": "7f1b7394f74c630f50612a19ba90bd021c373989"
716
781
  }
717
782
  ]
718
- }
719
- ,
783
+ },
720
784
  {
721
785
  "version": 1,
722
786
  "context_length": 4096,
@@ -1606,7 +1670,10 @@
1606
1670
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1607
1671
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1608
1672
  "quantization_parts": {
1609
- "q4_k_m": ["a", "b"]
1673
+ "q4_k_m": [
1674
+ "a",
1675
+ "b"
1676
+ ]
1610
1677
  }
1611
1678
  }
1612
1679
  ],
@@ -2658,7 +2725,11 @@
2658
2725
  "context_length": 32768,
2659
2726
  "model_name": "mixtral-v0.1",
2660
2727
  "model_lang": [
2661
- "en", "fr", "it", "de", "es"
2728
+ "en",
2729
+ "fr",
2730
+ "it",
2731
+ "de",
2732
+ "es"
2662
2733
  ],
2663
2734
  "model_ability": [
2664
2735
  "generate"
@@ -2699,7 +2770,11 @@
2699
2770
  "context_length": 32768,
2700
2771
  "model_name": "mixtral-instruct-v0.1",
2701
2772
  "model_lang": [
2702
- "en", "fr", "it", "de", "es"
2773
+ "en",
2774
+ "fr",
2775
+ "it",
2776
+ "de",
2777
+ "es"
2703
2778
  ],
2704
2779
  "model_ability": [
2705
2780
  "chat"
@@ -3275,10 +3350,8 @@
3275
3350
  ],
3276
3351
  "intra_message_sep": "\n",
3277
3352
  "inter_message_sep": "\n",
3278
- "stop_token_ids": [
3279
- ],
3280
- "stop": [
3281
- ]
3353
+ "stop_token_ids": [],
3354
+ "stop": []
3282
3355
  }
3283
3356
  },
3284
3357
  {
@@ -3376,7 +3449,8 @@
3376
3449
  "context_length": 4096,
3377
3450
  "model_name": "deepseek-coder-instruct",
3378
3451
  "model_lang": [
3379
- "en", "zh"
3452
+ "en",
3453
+ "zh"
3380
3454
  ],
3381
3455
  "model_ability": [
3382
3456
  "chat"
@@ -338,7 +338,7 @@
338
338
  64797,
339
339
  2
340
340
  ],
341
- "stop":[
341
+ "stop": [
342
342
  "<|user|>",
343
343
  "<|observation|>"
344
344
  ]
@@ -382,13 +382,12 @@
382
382
  64797,
383
383
  2
384
384
  ],
385
- "stop":[
385
+ "stop": [
386
386
  "<|user|>",
387
387
  "<|observation|>"
388
388
  ]
389
389
  }
390
390
  },
391
-
392
391
  {
393
392
  "version": 1,
394
393
  "context_length": 2048,
@@ -728,6 +727,74 @@
728
727
  }
729
728
  ]
730
729
  },
730
+ {
731
+ "version": 1,
732
+ "context_length": 8194,
733
+ "model_name": "codeshell",
734
+ "model_lang": [
735
+ "en",
736
+ "zh"
737
+ ],
738
+ "model_ability": [
739
+ "generate"
740
+ ],
741
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
742
+ "model_specs": [
743
+ {
744
+ "model_format": "pytorch",
745
+ "model_size_in_billions": 7,
746
+ "quantizations": [
747
+ "none"
748
+ ],
749
+ "model_id": "WisdomShell/CodeShell-7B",
750
+ "model_revision": "master",
751
+ "model_hub": "modelscope"
752
+ }
753
+ ]
754
+ },
755
+ {
756
+ "version": 1,
757
+ "context_length": 8194,
758
+ "model_name": "codeshell-chat",
759
+ "model_lang": [
760
+ "en",
761
+ "zh"
762
+ ],
763
+ "model_ability": [
764
+ "chat"
765
+ ],
766
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
767
+ "model_specs": [
768
+ {
769
+ "model_format": "pytorch",
770
+ "model_size_in_billions": 7,
771
+ "quantizations": [
772
+ "none"
773
+ ],
774
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
775
+ "model_revision": "master",
776
+ "model_hub": "modelscope"
777
+ }
778
+ ],
779
+ "prompt_style": {
780
+ "style_name": "CodeShell",
781
+ "system_prompt": "",
782
+ "roles": [
783
+ "## human:",
784
+ "## assistant: "
785
+ ],
786
+ "intra_message_sep": "",
787
+ "inter_message_sep": "",
788
+ "stop_token_ids": [
789
+ 70000
790
+ ],
791
+ "stop": [
792
+ "<|endoftext|>",
793
+ "|||",
794
+ "|<end>|"
795
+ ]
796
+ }
797
+ },
731
798
  {
732
799
  "version": 1,
733
800
  "context_length": 100000,
@@ -970,7 +1037,11 @@
970
1037
  "context_length": 32768,
971
1038
  "model_name": "mixtral-v0.1",
972
1039
  "model_lang": [
973
- "en", "fr", "it", "de", "es"
1040
+ "en",
1041
+ "fr",
1042
+ "it",
1043
+ "de",
1044
+ "es"
974
1045
  ],
975
1046
  "model_ability": [
976
1047
  "generate"
@@ -996,7 +1067,11 @@
996
1067
  "context_length": 32768,
997
1068
  "model_name": "mixtral-instruct-v0.1",
998
1069
  "model_lang": [
999
- "en", "fr", "it", "de", "es"
1070
+ "en",
1071
+ "fr",
1072
+ "it",
1073
+ "de",
1074
+ "es"
1000
1075
  ],
1001
1076
  "model_ability": [
1002
1077
  "chat"
@@ -1929,7 +2004,10 @@
1929
2004
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1930
2005
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1931
2006
  "quantization_parts": {
1932
- "q4_k_m": ["a", "b"]
2007
+ "q4_k_m": [
2008
+ "a",
2009
+ "b"
2010
+ ]
1933
2011
  }
1934
2012
  }
1935
2013
  ],
@@ -2008,7 +2086,8 @@
2008
2086
  "context_length": 4096,
2009
2087
  "model_name": "deepseek-coder-instruct",
2010
2088
  "model_lang": [
2011
- "en", "zh"
2089
+ "en",
2090
+ "zh"
2012
2091
  ],
2013
2092
  "model_ability": [
2014
2093
  "chat"
@@ -148,6 +148,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
148
148
 
149
149
  def _stream_generator():
150
150
  last_chunk_text_length = 0
151
+ chunk_id = "chat-" + str(uuid.uuid1())
151
152
  for chunk_text, _ in self._model.stream_chat(
152
153
  self._tokenizer, prompt, chat_history, **kwargs
153
154
  ):
@@ -157,7 +158,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
157
158
  text=chunk_text, index=0, logprobs=None, finish_reason=None
158
159
  )
159
160
  yield CompletionChunk(
160
- id=str(uuid.uuid1()),
161
+ id=chunk_id,
161
162
  object="text_completion",
162
163
  created=int(time.time()),
163
164
  model=self.model_uid,
@@ -118,6 +118,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
118
118
 
119
119
  def _stream_generator():
120
120
  last_chunk_text_length = 0
121
+ chunk_id = "chat-" + str(uuid.uuid1())
121
122
  for chunk_text, _ in self._model.stream_chat(
122
123
  self._tokenizer, prompt, input_history, **kwargs
123
124
  ):
@@ -127,7 +128,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
127
128
  text=chunk_text, index=0, logprobs=None, finish_reason=None
128
129
  )
129
130
  yield CompletionChunk(
130
- id=str(uuid.uuid1()),
131
+ id=chunk_id,
131
132
  object="text_completion",
132
133
  created=int(time.time()),
133
134
  model=self.model_uid,
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,365 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import time
17
+ import uuid
18
+ from typing import AsyncGenerator, Dict, List, Optional, TypedDict, Union
19
+
20
+ from ....constants import XINFERENCE_ENABLE_SGLANG
21
+ from ....types import (
22
+ ChatCompletion,
23
+ ChatCompletionChunk,
24
+ ChatCompletionMessage,
25
+ Completion,
26
+ CompletionChoice,
27
+ CompletionChunk,
28
+ CompletionUsage,
29
+ )
30
+ from .. import LLM, LLMFamilyV1, LLMSpecV1
31
+ from ..llm_family import CustomLLMFamilyV1
32
+ from ..utils import ChatModelMixin
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class SGLANGModelConfig(TypedDict, total=False):
38
+ tokenizer_mode: str
39
+ trust_remote_code: bool
40
+ tp_size: int
41
+ mem_fraction_static: float
42
+ log_level: str
43
+ attention_reduce_in_fp32: bool # For gemma
44
+
45
+
46
+ class SGLANGGenerateConfig(TypedDict, total=False):
47
+ presence_penalty: float
48
+ frequency_penalty: float
49
+ temperature: float
50
+ top_p: float
51
+ top_k: int
52
+ max_new_tokens: int
53
+ stop: Optional[Union[str, List[str]]]
54
+ ignore_eos: bool
55
+ stream: bool
56
+
57
+
58
+ try:
59
+ import sglang # noqa: F401
60
+
61
+ SGLANG_INSTALLED = True
62
+ except ImportError:
63
+ SGLANG_INSTALLED = False
64
+
65
+ SGLANG_SUPPORTED_MODELS = ["llama-2", "mistral-v0.1", "mixtral-v0.1"]
66
+ SGLANG_SUPPORTED_CHAT_MODELS = [
67
+ "llama-2-chat",
68
+ "qwen-chat",
69
+ "qwen1.5-chat",
70
+ "mistral-instruct-v0.1",
71
+ "mistral-instruct-v0.2",
72
+ "mixtral-instruct-v0.1",
73
+ "gemma-it",
74
+ ]
75
+
76
+
77
+ class SGLANGModel(LLM):
78
+ def __init__(
79
+ self,
80
+ model_uid: str,
81
+ model_family: "LLMFamilyV1",
82
+ model_spec: "LLMSpecV1",
83
+ quantization: str,
84
+ model_path: str,
85
+ model_config: Optional[SGLANGModelConfig],
86
+ ):
87
+ super().__init__(model_uid, model_family, model_spec, quantization, model_path)
88
+ self._model_config = model_config
89
+ self._engine = None
90
+
91
+ def load(self):
92
+ try:
93
+ import sglang as sgl
94
+ except ImportError:
95
+ error_message = "Failed to import module 'sglang'"
96
+ installation_guide = [
97
+ "Please make sure 'sglang' is installed. ",
98
+ "You can install it by `pip install 'sglang[all]'`\n",
99
+ ]
100
+
101
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
102
+
103
+ self._model_config = self._sanitize_model_config(self._model_config)
104
+ logger.info(
105
+ f"Loading {self.model_uid} with following model config: {self._model_config}"
106
+ )
107
+
108
+ self._engine = sgl.Runtime(
109
+ model_path=self.model_path,
110
+ tokenizer_path=self.model_path,
111
+ **self._model_config,
112
+ )
113
+
114
+ def _sanitize_model_config(
115
+ self, model_config: Optional[SGLANGModelConfig]
116
+ ) -> SGLANGModelConfig:
117
+ if model_config is None:
118
+ model_config = SGLANGModelConfig()
119
+
120
+ cuda_count = self._get_cuda_count()
121
+ model_config.setdefault("tokenizer_mode", "auto")
122
+ model_config.setdefault("trust_remote_code", True)
123
+ model_config.setdefault("tp_size", cuda_count)
124
+ # See https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py#L37
125
+ mem_fraction_static = model_config.pop("mem_fraction_static", None)
126
+ if mem_fraction_static is None:
127
+ tp_size = model_config.get("tp_size", cuda_count)
128
+ if tp_size >= 8:
129
+ model_config["mem_fraction_static"] = 0.80
130
+ elif tp_size >= 4:
131
+ model_config["mem_fraction_static"] = 0.82
132
+ elif tp_size >= 2:
133
+ model_config["mem_fraction_static"] = 0.85
134
+ else:
135
+ model_config["mem_fraction_static"] = 0.90
136
+ model_config.setdefault("log_level", "info")
137
+ model_config.setdefault("attention_reduce_in_fp32", False)
138
+
139
+ return model_config
140
+
141
+ @staticmethod
142
+ def _sanitize_generate_config(
143
+ generate_config: Optional[SGLANGGenerateConfig] = None,
144
+ ) -> SGLANGGenerateConfig:
145
+ if generate_config is None:
146
+ generate_config = SGLANGGenerateConfig()
147
+
148
+ generate_config.setdefault("presence_penalty", 0.0)
149
+ generate_config.setdefault("frequency_penalty", 0.0)
150
+ generate_config.setdefault("temperature", 1.0)
151
+ generate_config.setdefault("top_p", 1.0)
152
+ generate_config.setdefault("top_k", -1)
153
+ # See https://github.com/sgl-project/sglang/blob/main/python/sglang/lang/ir.py#L120
154
+ # 16 is too less, so here set 256 by default
155
+ generate_config.setdefault(
156
+ "max_new_tokens", generate_config.pop("max_tokens", 256) # type: ignore
157
+ )
158
+ generate_config.setdefault("stop", [])
159
+ generate_config.setdefault("stream", False)
160
+ generate_config.setdefault("ignore_eos", False)
161
+
162
+ return generate_config
163
+
164
+ @classmethod
165
+ def match(
166
+ cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
167
+ ) -> bool:
168
+ if not XINFERENCE_ENABLE_SGLANG:
169
+ return False
170
+ if not cls._has_cuda_device():
171
+ return False
172
+ if not cls._is_linux():
173
+ return False
174
+ if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
175
+ return False
176
+ if llm_spec.model_format == "pytorch":
177
+ if quantization != "none" and not (quantization is None):
178
+ return False
179
+ if llm_spec.model_format in ["gptq", "awq"]:
180
+ # Currently, only 4-bit weight quantization is supported for GPTQ, but got 8 bits.
181
+ if "4" not in quantization:
182
+ return False
183
+ if isinstance(llm_family, CustomLLMFamilyV1):
184
+ if llm_family.model_family not in SGLANG_SUPPORTED_MODELS:
185
+ return False
186
+ else:
187
+ if llm_family.model_name not in SGLANG_SUPPORTED_MODELS:
188
+ return False
189
+ if "generate" not in llm_family.model_ability:
190
+ return False
191
+ return SGLANG_INSTALLED
192
+
193
+ @staticmethod
194
+ def _convert_state_to_completion_chunk(
195
+ request_id: str, model: str, output_text: str, meta_info: Dict
196
+ ) -> CompletionChunk:
197
+ choices: List[CompletionChoice] = [
198
+ CompletionChoice(
199
+ text=output_text,
200
+ index=0,
201
+ logprobs=None,
202
+ finish_reason=None,
203
+ )
204
+ ]
205
+ chunk = CompletionChunk(
206
+ id=request_id,
207
+ object="text_completion",
208
+ created=int(time.time()),
209
+ model=model,
210
+ choices=choices,
211
+ )
212
+ prompt_tokens = meta_info["prompt_tokens"]
213
+ completion_tokens = meta_info["completion_tokens"]
214
+ chunk["usage"] = CompletionUsage(
215
+ prompt_tokens=prompt_tokens,
216
+ completion_tokens=completion_tokens,
217
+ total_tokens=prompt_tokens + completion_tokens,
218
+ )
219
+ return chunk
220
+
221
+ @staticmethod
222
+ def _convert_state_to_completion(
223
+ request_id: str, model: str, output_text: str, meta_info: Dict
224
+ ) -> Completion:
225
+ choices = [
226
+ CompletionChoice(
227
+ text=output_text,
228
+ index=0,
229
+ logprobs=None,
230
+ finish_reason=None,
231
+ )
232
+ ]
233
+
234
+ usage = CompletionUsage(
235
+ prompt_tokens=meta_info["prompt_tokens"],
236
+ completion_tokens=meta_info["completion_tokens"],
237
+ total_tokens=meta_info["prompt_tokens"] + meta_info["completion_tokens"],
238
+ )
239
+ return Completion(
240
+ id=request_id,
241
+ object="text_completion",
242
+ created=int(time.time()),
243
+ model=model,
244
+ choices=choices,
245
+ usage=usage,
246
+ )
247
+
248
+ async def async_generate(
249
+ self,
250
+ prompt: str,
251
+ generate_config: Optional[SGLANGGenerateConfig] = None,
252
+ ) -> Union[Completion, AsyncGenerator[CompletionChunk, None]]:
253
+ try:
254
+ import sglang as sgl
255
+ from sglang import assistant, gen, user
256
+ except ImportError:
257
+ error_message = "Failed to import module 'sglang'"
258
+ installation_guide = [
259
+ "Please make sure 'sglang' is installed. ",
260
+ "You can install it by `pip install sglang[all]`\n",
261
+ ]
262
+
263
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
264
+
265
+ @sgl.function
266
+ def pipeline(s, question):
267
+ s += user(question)
268
+ s += assistant(gen("answer"))
269
+
270
+ sanitized_generate_config = self._sanitize_generate_config(generate_config)
271
+ logger.debug(
272
+ "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
273
+ )
274
+ stream = sanitized_generate_config.pop("stream")
275
+ request_id = str(uuid.uuid1())
276
+ state = pipeline.run(
277
+ question=prompt,
278
+ backend=self._engine,
279
+ stream=stream,
280
+ **sanitized_generate_config,
281
+ )
282
+ if not stream:
283
+ return self._convert_state_to_completion(
284
+ request_id,
285
+ model=self.model_uid,
286
+ output_text=state["answer"],
287
+ meta_info=state.get_meta_info(name="answer"),
288
+ )
289
+ else:
290
+
291
+ async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
292
+ async for out, meta_info in state.text_async_iter(
293
+ var_name="answer", return_meta_data=True
294
+ ):
295
+ chunk = self._convert_state_to_completion_chunk(
296
+ request_id, self.model_uid, output_text=out, meta_info=meta_info
297
+ )
298
+ yield chunk
299
+
300
+ return stream_results()
301
+
302
+
303
+ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
304
+ @classmethod
305
+ def match(
306
+ cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
307
+ ) -> bool:
308
+ if not XINFERENCE_ENABLE_SGLANG:
309
+ return False
310
+ if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
311
+ return False
312
+ if llm_spec.model_format == "pytorch":
313
+ if quantization != "none" and not (quantization is None):
314
+ return False
315
+ if llm_spec.model_format in ["gptq", "awq"]:
316
+ # Currently, only 4-bit weight quantization is supported for GPTQ, but got 8 bits.
317
+ if "4" not in quantization:
318
+ return False
319
+ if isinstance(llm_family, CustomLLMFamilyV1):
320
+ if llm_family.model_family not in SGLANG_SUPPORTED_CHAT_MODELS:
321
+ return False
322
+ else:
323
+ if llm_family.model_name not in SGLANG_SUPPORTED_CHAT_MODELS:
324
+ return False
325
+ if "chat" not in llm_family.model_ability:
326
+ return False
327
+ return SGLANG_INSTALLED
328
+
329
+ def _sanitize_chat_config(
330
+ self,
331
+ generate_config: Optional[Dict] = None,
332
+ ) -> Dict:
333
+ if not generate_config:
334
+ generate_config = {}
335
+ if self.model_family.prompt_style:
336
+ if (
337
+ not generate_config.get("stop")
338
+ ) and self.model_family.prompt_style.stop:
339
+ generate_config["stop"] = self.model_family.prompt_style.stop.copy()
340
+ return generate_config
341
+
342
+ async def async_chat(
343
+ self,
344
+ prompt: str,
345
+ system_prompt: Optional[str] = None,
346
+ chat_history: Optional[List[ChatCompletionMessage]] = None,
347
+ generate_config: Optional[Dict] = None,
348
+ ) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
349
+ assert self.model_family.prompt_style is not None
350
+ prompt_style = self.model_family.prompt_style.copy()
351
+ if system_prompt:
352
+ prompt_style.system_prompt = system_prompt
353
+ chat_history = chat_history or []
354
+ full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
355
+
356
+ generate_config = self._sanitize_chat_config(generate_config)
357
+ stream = generate_config.get("stream", None)
358
+ if stream:
359
+ agen = await self.async_generate(full_prompt, generate_config) # type: ignore
360
+ assert isinstance(agen, AsyncGenerator)
361
+ return self._async_to_chat_completion_chunks(agen)
362
+ else:
363
+ c = await self.async_generate(full_prompt, generate_config) # type: ignore
364
+ assert not isinstance(c, AsyncGenerator)
365
+ return self._to_chat_completion(c)
@@ -411,6 +411,16 @@ Begin!"""
411
411
  if content:
412
412
  ret += content + "<end_of_turn>\n"
413
413
  return ret
414
+ elif prompt_style.style_name == "CodeShell":
415
+ ret = ""
416
+ for message in chat_history:
417
+ content = message["content"]
418
+ role = get_role(message["role"])
419
+ if content:
420
+ ret += f"{role}{content}|<end>|"
421
+ else:
422
+ ret += f"{role}".rstrip()
423
+ return ret
414
424
  else:
415
425
  raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
416
426
 
@@ -451,6 +461,7 @@ Begin!"""
451
461
  "index": i,
452
462
  "delta": {
453
463
  "role": "assistant",
464
+ "content": "",
454
465
  },
455
466
  "finish_reason": None,
456
467
  }
@@ -535,26 +546,39 @@ Begin!"""
535
546
  # Refer to:
536
547
  # https://github.com/QwenLM/Qwen/blob/main/examples/react_prompt.md
537
548
  # https://github.com/QwenLM/Qwen/blob/main/openai_api.py#L297
538
- func_name, func_args = "", ""
549
+ func_name, func_args, content = "", "", ""
539
550
  i = text.rfind("\nAction:")
540
551
  j = text.rfind("\nAction Input:")
541
552
  k = text.rfind("\nObservation:")
553
+ t = max(
554
+ text.rfind("\nThought:", 0, i), text.rfind("Thought:", 0, i)
555
+ ) # find the last thought just before Action, considering the Thought at the very beginning
542
556
  if 0 <= i < j: # If the text has `Action` and `Action input`,
543
557
  if k < j: # but does not contain `Observation`,
544
558
  # then it is likely that `Observation` is omitted by the LLM,
545
559
  # because the output text may have discarded the stop word.
546
560
  text = text.rstrip() + "\nObservation:" # Add it back.
547
561
  k = text.rfind("\nObservation:")
548
- if 0 <= i < j < k:
562
+ if 0 <= t < i < j < k:
549
563
  func_name = text[i + len("\nAction:") : j].strip()
550
564
  func_args = text[j + len("\nAction Input:") : k].strip()
565
+ content = text[
566
+ t + len("\nThought:") : i
567
+ ].strip() # len("\nThought:") and len("Thought:") both are OK since there is a space after :
551
568
  if func_name:
552
- return None, func_name, json.loads(func_args)
553
- z = text.rfind("\nFinal Answer: ")
554
- if z >= 0:
555
- text = text[z + len("\nFinal Answer: ") :]
569
+ return content, func_name, json.loads(func_args)
556
570
  except Exception as e:
557
571
  logger.error("Eval tool calls completion failed: %s", e)
572
+ t = max(text.rfind("\nThought:"), text.rfind("Thought:"))
573
+ z = max(text.rfind("\nFinal Answer:"), text.rfind("Final Answer:"))
574
+ if z >= 0:
575
+ text = text[
576
+ z + len("\nFinal Answer:") :
577
+ ] # len("\nFinal Answer::") and len("Final Answer::") both are OK since there is a space after :
578
+ else:
579
+ text = text[
580
+ t + len("\nThought:") :
581
+ ] # There is only Thought: no Final Answer:
558
582
  return text, None, None
559
583
 
560
584
  @classmethod
@@ -573,13 +597,10 @@ Begin!"""
573
597
  )
574
598
  logger.debug("Tool call content: %s, func: %s, args: %s", content, func, args)
575
599
 
576
- if content:
577
- m = {"role": "assistant", "content": content, "tool_calls": []}
578
- finish_reason = "stop"
579
- else:
600
+ if func:
580
601
  m = {
581
602
  "role": "assistant",
582
- "content": None,
603
+ "content": content,
583
604
  "tool_calls": [
584
605
  {
585
606
  "id": f"call_{_id}",
@@ -592,7 +613,9 @@ Begin!"""
592
613
  ],
593
614
  }
594
615
  finish_reason = "tool_calls"
595
-
616
+ else:
617
+ m = {"role": "assistant", "content": content, "tool_calls": []}
618
+ finish_reason = "stop"
596
619
  return {
597
620
  "id": "chat" + f"cmpl-{_id}",
598
621
  "model": model_uid,
@@ -103,6 +103,13 @@ VLLM_SUPPORTED_CHAT_MODELS = [
103
103
  if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
104
104
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
105
105
 
106
+ if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":
107
+ VLLM_SUPPORTED_CHAT_MODELS.append("gemma-it")
108
+
109
+ if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
110
+ VLLM_SUPPORTED_CHAT_MODELS.append("orion-chat")
111
+ VLLM_SUPPORTED_CHAT_MODELS.append("orion-chat-rag")
112
+
106
113
 
107
114
  class VLLMModel(LLM):
108
115
  def __init__(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.9.3
3
+ Version: 0.9.4
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -62,6 +62,7 @@ Requires-Dist: orjson ; extra == 'all'
62
62
  Requires-Dist: optimum ; extra == 'all'
63
63
  Requires-Dist: auto-gptq ; (sys_platform != "darwin") and extra == 'all'
64
64
  Requires-Dist: vllm >=0.2.6 ; (sys_platform == "linux") and extra == 'all'
65
+ Requires-Dist: sglang[all] ; (sys_platform == "linux") and extra == 'all'
65
66
  Provides-Extra: benchmark
66
67
  Requires-Dist: psutil ; extra == 'benchmark'
67
68
  Provides-Extra: dev
@@ -105,6 +106,8 @@ Requires-Dist: controlnet-aux ; extra == 'image'
105
106
  Provides-Extra: intel
106
107
  Requires-Dist: torch ==2.1.0a0 ; extra == 'intel'
107
108
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'intel'
109
+ Provides-Extra: sglang
110
+ Requires-Dist: sglang[all] ; extra == 'sglang'
108
111
  Provides-Extra: transformers
109
112
  Requires-Dist: transformers >=4.34.1 ; extra == 'transformers'
110
113
  Requires-Dist: torch ; extra == 'transformers'
@@ -1,15 +1,15 @@
1
1
  xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
2
2
  xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
3
- xinference/_version.py,sha256=Lg7riCQT494Ylj5YFCWP1j41UqI3CrUvWJukfkpY7ck,497
3
+ xinference/_version.py,sha256=NA8Gd_eyeZ4pGEW181mxcRG1Ae6xSp0HasvpDwAg4iQ,497
4
4
  xinference/conftest.py,sha256=soSjfLdFYvMleUaSL4cZKPtP1WdlAb7q3Sm2pX4aTog,9523
5
- xinference/constants.py,sha256=81M8ATbulsB78fht4pp0H8q2dDU2qTqSmu1FAjRiOKk,2738
5
+ xinference/constants.py,sha256=Bu_fOJUGAvvqF_6FY5OzOHl7fQ1Nomek3LY17xr9oz4,2882
6
6
  xinference/device_utils.py,sha256=WNKDD4Eni3Io3AehiyonsuoJaukT77Bc76Es7vNGvjc,2615
7
7
  xinference/fields.py,sha256=BFAVowcvvwA4i1HHHeyD1J_p3p6Vi5hVmKY0CT21blM,5075
8
8
  xinference/isolation.py,sha256=NstVRcO3dG4umHExICXAHlzVKwH8ch8MBwKwE-KFkE0,1826
9
9
  xinference/types.py,sha256=_tbGgdMTlXs6oINWbEmCesyckDDa2tsPRH9yzM8fKSI,11812
10
10
  xinference/utils.py,sha256=Z6PPDGmX4EW8OD3OfA2Wa37ZM9OdRTnR00ITMDTu4qE,716
11
11
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
- xinference/api/restful_api.py,sha256=pMpT7H46viyG1i1dez6Xqmsz2G-xzWDXmkZXK-d3sxY,58898
12
+ xinference/api/restful_api.py,sha256=FaD0PJxBWJSh_cpOIsjMfUfCP5XPuhtQu_I48xSvYfE,58762
13
13
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
14
14
  xinference/api/oauth2/auth_service.py,sha256=n88v85kkkM_4NYZBmbETM7OF7XuehWv9gyLRUmhVL1Q,4929
15
15
  xinference/api/oauth2/types.py,sha256=ltAkjPBEJVSvYVhzwjaJpe2eUbnqkx2vofLuRbcGMg4,972
@@ -65,12 +65,12 @@ xinference/model/image/model_spec_modelscope.json,sha256=KMI-2YOJoLUtN93LZiqqETW
65
65
  xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
66
66
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
67
67
  xinference/model/image/stable_diffusion/core.py,sha256=YStAtcO-HVR36brvy7DK-D-Z-LT1X4tfwrVrXcLXK5Y,6010
68
- xinference/model/llm/__init__.py,sha256=j-e1XhrqQvizR4CU4AhURu29t4LA2qWP2hHNf7haj6Q,6417
68
+ xinference/model/llm/__init__.py,sha256=vA1nVY-lDVB4AXuWxsvoevvrU526c4AmSt2UTjTT2pQ,6530
69
69
  xinference/model/llm/core.py,sha256=ubS7maXHr5tzTUey1D8ta49Ur1C5RMB-rnpAZNqZZ8c,9526
70
- xinference/model/llm/llm_family.json,sha256=D1910vsIuaY0O2t6GTAhQulcTfZIg9OriGss8l9cP4o,95056
70
+ xinference/model/llm/llm_family.json,sha256=eGD_z3Z32a9TWh8qjCKyF6qpcjhu39ow5Kfx1zb_eyU,96692
71
71
  xinference/model/llm/llm_family.py,sha256=BoqfNX6DpIJylIOUYd1xpDybu1WyV94HMtdQRlRHVho,33447
72
- xinference/model/llm/llm_family_modelscope.json,sha256=aJgARoqfYKp5slOoGv-6kHi7Z-MuKiUAfBPJXStSGBo,59087
73
- xinference/model/llm/utils.py,sha256=Gh6CjrOnhQAZYrmRUvxLD_io3RLa-xyYQ5G34Lk7OQ0,25715
72
+ xinference/model/llm/llm_family_modelscope.json,sha256=eGyEottnMKNwRsesOfF5tVof3jOGp4GxM6mu80uiQfg,60741
73
+ xinference/model/llm/utils.py,sha256=js5lml_Ar8TUjAX1YWfhlfAYsFgAhPC35KEiLZbVzOE,26893
74
74
  xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
75
75
  xinference/model/llm/ggml/chatglm.py,sha256=Zrzw8K2EroI5v2JlwOAJ08tNFs871n86zRtBxuK97Z8,13044
76
76
  xinference/model/llm/ggml/ctransformers.py,sha256=n8dTItZe97cF79NkmsVPirqpBcrZiGAQfd2GRpz9-3I,9917
@@ -81,11 +81,11 @@ xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py,sha256=ULvaoAKGH-L6RuRLF
81
81
  xinference/model/llm/ggml/tools/gguf.py,sha256=Hv2haR-UN7NdB1N8YId32hFoEPd-JX6_aUNWRJhyJZc,30277
82
82
  xinference/model/llm/pytorch/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
83
83
  xinference/model/llm/pytorch/baichuan.py,sha256=gh49PNue47T4XUwE5bx_ErDwagb1dYVrhBFYXUsJDQ8,2795
84
- xinference/model/llm/pytorch/chatglm.py,sha256=jjoYRcZmFht9hXKh8zp6JGlY-jPJTJ5nuUc1nJAFfPs,7099
84
+ xinference/model/llm/pytorch/chatglm.py,sha256=VF4-Kzw-THpedw0EEpnJoCJi4OgDmnlQbqKmVrwCxQI,7149
85
85
  xinference/model/llm/pytorch/compression.py,sha256=U0vMJ-JaBt4oC2LffgWg6HbPj1CeUi_YdwVbjDd0mRA,8112
86
86
  xinference/model/llm/pytorch/core.py,sha256=bmLyP2GWk4mfq7LP6z8-ME5-AC3KikU2BbglqKPXNck,19228
87
87
  xinference/model/llm/pytorch/falcon.py,sha256=Dp19-Sv4mbx1PAVMEp7biATqs4T1WdwcvZZhuVndmYQ,4482
88
- xinference/model/llm/pytorch/internlm2.py,sha256=WvXehL7iSoL9OB_WE_3omcFBJkwnuUHfWYnS0wWPRvo,5661
88
+ xinference/model/llm/pytorch/internlm2.py,sha256=fW-iQxo2CCyZ7dODJPb6w_NoyCfGZj6zCZ_CI1Su_os,5707
89
89
  xinference/model/llm/pytorch/llama_2.py,sha256=RiS6chAj-nh_E-GOZPV-Ze1zDevGmJkhWZMdF4Etoq4,3693
90
90
  xinference/model/llm/pytorch/qwen_vl.py,sha256=jyE9em7sMTosYPLqW08aCuEc6c2zNKLJzIcFzowKjyk,5722
91
91
  xinference/model/llm/pytorch/spec_decoding_utils.py,sha256=brSxcXkXlELprGK2cLjNTW-3h1Lk9GlgcOFYZo5NBAA,18781
@@ -93,8 +93,10 @@ xinference/model/llm/pytorch/spec_model.py,sha256=8E0jZuXp9MlNLCqgvOjNdUeYqJdMZq
93
93
  xinference/model/llm/pytorch/utils.py,sha256=frFvbT2Ka4YRsfnx6ma8IW_i8eqqRwSo5pCU0qnOJtQ,17288
94
94
  xinference/model/llm/pytorch/vicuna.py,sha256=PjLD-5BF4MVXkPs1_WncCCuziDKl5tiGXHf-zkrBz_c,2309
95
95
  xinference/model/llm/pytorch/yi_vl.py,sha256=aJZQauLmyXEtnz57ZU-Z3pmi0dFdlR_UjnZu0QsC06c,9393
96
+ xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
97
+ xinference/model/llm/sglang/core.py,sha256=eqAczZfGJInC_jihXVeKiWQ79Llk3reHDBkdShQlH-0,12915
96
98
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
97
- xinference/model/llm/vllm/core.py,sha256=l6-oywSkDrnW8576xdtyqNiKmE6nopMFpzZRvFB988s,15693
99
+ xinference/model/llm/vllm/core.py,sha256=f2hsH7UayhIan47QEif8XQvgD45u9ZMn5LzeWY42oag,15955
98
100
  xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
99
101
  xinference/model/rerank/core.py,sha256=A9NqcEFRzahE8fcRgRUfY8pHUm5jYG5THxI2qSKTjFY,7771
100
102
  xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
@@ -15374,9 +15376,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15374
15376
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15375
15377
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15376
15378
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15377
- xinference-0.9.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15378
- xinference-0.9.3.dist-info/METADATA,sha256=yklS-7FS330guT_xiz7gXER1FNjyKI7r2UTXMly4WEI,14129
15379
- xinference-0.9.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15380
- xinference-0.9.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15381
- xinference-0.9.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15382
- xinference-0.9.3.dist-info/RECORD,,
15379
+ xinference-0.9.4.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15380
+ xinference-0.9.4.dist-info/METADATA,sha256=n-FCXzg0OH6gsXeLERrjxAY47xVGBc3mJj5seynjvGw,14273
15381
+ xinference-0.9.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15382
+ xinference-0.9.4.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15383
+ xinference-0.9.4.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15384
+ xinference-0.9.4.dist-info/RECORD,,