xinference 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/core/gradio.py +10 -7
- xinference/core/model.py +2 -4
- xinference/core/restful_api.py +7 -3
- xinference/core/service.py +1 -0
- xinference/deploy/supervisor.py +25 -5
- xinference/deploy/worker.py +4 -0
- xinference/locale/utils.py +2 -1
- xinference/locale/zh_CN.json +25 -0
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/METADATA +50 -36
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/RECORD +15 -14
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/LICENSE +0 -0
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/WHEEL +0 -0
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.0.1.dist-info → xinference-0.0.3.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2023-07-
|
|
11
|
+
"date": "2023-07-11T21:48:28+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.0.
|
|
14
|
+
"full-revisionid": "12ed8a3a876dea13fbd61644f49fc49622c2eb26",
|
|
15
|
+
"version": "0.0.3"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/core/gradio.py
CHANGED
|
@@ -27,7 +27,9 @@ if TYPE_CHECKING:
|
|
|
27
27
|
from ..types import ChatCompletionChunk, ChatCompletionMessage
|
|
28
28
|
|
|
29
29
|
MODEL_TO_FAMILIES = dict(
|
|
30
|
-
(model_family.model_name, model_family)
|
|
30
|
+
(model_family.model_name, model_family)
|
|
31
|
+
for model_family in MODEL_FAMILIES
|
|
32
|
+
if model_family.model_name != "baichuan"
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
|
|
@@ -36,7 +38,7 @@ class GradioApp:
|
|
|
36
38
|
self,
|
|
37
39
|
supervisor_address: str,
|
|
38
40
|
gladiator_num: int = 2,
|
|
39
|
-
max_model_num: int =
|
|
41
|
+
max_model_num: int = 3,
|
|
40
42
|
use_launched_model: bool = False,
|
|
41
43
|
):
|
|
42
44
|
self._api = SyncSupervisorAPI(supervisor_address)
|
|
@@ -193,7 +195,7 @@ class GradioApp:
|
|
|
193
195
|
with gr.Column():
|
|
194
196
|
with gr.Row():
|
|
195
197
|
model_name = gr.Dropdown(
|
|
196
|
-
choices=
|
|
198
|
+
choices=list(MODEL_TO_FAMILIES.keys()),
|
|
197
199
|
label=self._locale("model name"),
|
|
198
200
|
scale=2,
|
|
199
201
|
)
|
|
@@ -311,10 +313,11 @@ class GradioApp:
|
|
|
311
313
|
_model_size_in_billions: str,
|
|
312
314
|
_quantization: str,
|
|
313
315
|
):
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
316
|
+
full_name = "-".join(
|
|
317
|
+
[_model_name, _model_size_in_billions, _model_format, _quantization]
|
|
318
|
+
)
|
|
319
|
+
return str(uuid.uuid4()), gr.Chatbot.update(
|
|
320
|
+
label=full_name,
|
|
318
321
|
value=[],
|
|
319
322
|
)
|
|
320
323
|
|
xinference/core/model.py
CHANGED
|
@@ -77,10 +77,8 @@ class ModelActor(xo.Actor):
|
|
|
77
77
|
return ret
|
|
78
78
|
|
|
79
79
|
async def generate(self, prompt: str, *args, **kwargs):
|
|
80
|
-
logger.warning("Generate, self address: %s", self.address)
|
|
81
|
-
|
|
82
80
|
if not hasattr(self._model, "generate"):
|
|
83
|
-
raise AttributeError("generate")
|
|
81
|
+
raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
|
|
84
82
|
|
|
85
83
|
return self._wrap_generator(
|
|
86
84
|
getattr(self._model, "generate")(prompt, *args, **kwargs)
|
|
@@ -88,7 +86,7 @@ class ModelActor(xo.Actor):
|
|
|
88
86
|
|
|
89
87
|
async def chat(self, prompt: str, *args, **kwargs):
|
|
90
88
|
if not hasattr(self._model, "chat"):
|
|
91
|
-
raise AttributeError("chat")
|
|
89
|
+
raise AttributeError(f"Model {self._model.model_spec} is not for chat.")
|
|
92
90
|
|
|
93
91
|
return self._wrap_generator(
|
|
94
92
|
getattr(self._model, "chat")(prompt, *args, **kwargs)
|
xinference/core/restful_api.py
CHANGED
|
@@ -262,9 +262,13 @@ class RESTfulAPIActor(xo.Actor):
|
|
|
262
262
|
# run uvicorn in another daemon thread.
|
|
263
263
|
config = Config(app=app, log_level="critical")
|
|
264
264
|
server = Server(config)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
265
|
+
|
|
266
|
+
def _serve():
|
|
267
|
+
httpx_logger = logging.getLogger("httpx")
|
|
268
|
+
httpx_logger.setLevel(logging.CRITICAL)
|
|
269
|
+
server.run(self._sockets)
|
|
270
|
+
|
|
271
|
+
server_thread = threading.Thread(target=_serve, daemon=True)
|
|
268
272
|
server_thread.start()
|
|
269
273
|
|
|
270
274
|
async def list_models(self) -> Dict[str, Dict[str, Any]]:
|
xinference/core/service.py
CHANGED
|
@@ -175,6 +175,7 @@ class SupervisorActor(xo.Actor):
|
|
|
175
175
|
|
|
176
176
|
worker_ref = await xo.actor_ref(address=worker_address, uid=WorkerActor.uid())
|
|
177
177
|
self._worker_address_to_worker[worker_address] = worker_ref
|
|
178
|
+
logger.info("Worker %s has been added successfully", worker_address)
|
|
178
179
|
|
|
179
180
|
async def report_worker_status(
|
|
180
181
|
self, worker_address: str, status: Dict[str, ResourceStatus]
|
xinference/deploy/supervisor.py
CHANGED
|
@@ -18,7 +18,9 @@ import socket
|
|
|
18
18
|
from typing import Dict, Optional
|
|
19
19
|
|
|
20
20
|
import xoscar as xo
|
|
21
|
+
from xoscar.utils import get_next_port
|
|
21
22
|
|
|
23
|
+
from ..constants import XINFERENCE_DEFAULT_ENDPOINT_PORT
|
|
22
24
|
from ..core.gradio import GradioApp
|
|
23
25
|
from ..core.restful_api import RESTfulAPIActor
|
|
24
26
|
from ..core.service import SupervisorActor
|
|
@@ -30,10 +32,28 @@ async def start_supervisor_components(address: str, host: str, port: int):
|
|
|
30
32
|
await xo.create_actor(SupervisorActor, address=address, uid=SupervisorActor.uid())
|
|
31
33
|
gradio_block = GradioApp(address).build()
|
|
32
34
|
# create a socket for RESTful API
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
try:
|
|
36
|
+
sockets = []
|
|
37
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
38
|
+
sock.bind((host, port))
|
|
39
|
+
sockets.append(sock)
|
|
40
|
+
except OSError:
|
|
41
|
+
# compare the reference to differentiate between the cases where the user specify the
|
|
42
|
+
# default port and the user does not specify the port.
|
|
43
|
+
if port is XINFERENCE_DEFAULT_ENDPOINT_PORT:
|
|
44
|
+
while True:
|
|
45
|
+
try:
|
|
46
|
+
sockets = []
|
|
47
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
48
|
+
port = get_next_port()
|
|
49
|
+
sock.bind((host, port))
|
|
50
|
+
sockets.append(sock)
|
|
51
|
+
break
|
|
52
|
+
except OSError:
|
|
53
|
+
logger.warning("Failed to create socket with port %d", port)
|
|
54
|
+
else:
|
|
55
|
+
raise
|
|
56
|
+
|
|
37
57
|
restful_actor = await xo.create_actor(
|
|
38
58
|
RESTfulAPIActor,
|
|
39
59
|
address=address,
|
|
@@ -43,7 +63,7 @@ async def start_supervisor_components(address: str, host: str, port: int):
|
|
|
43
63
|
)
|
|
44
64
|
await restful_actor.serve()
|
|
45
65
|
url = f"http://{host}:{port}"
|
|
46
|
-
logger.info(f"
|
|
66
|
+
logger.info(f"Xinference successfully started. Endpoint: {url}")
|
|
47
67
|
return url
|
|
48
68
|
|
|
49
69
|
|
xinference/deploy/worker.py
CHANGED
|
@@ -13,12 +13,15 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import logging
|
|
16
17
|
from typing import Dict, Optional
|
|
17
18
|
|
|
18
19
|
import xoscar as xo
|
|
19
20
|
|
|
20
21
|
from ..core.service import WorkerActor
|
|
21
22
|
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
async def start_worker_components(address: str, supervisor_address: str):
|
|
24
27
|
actor_pool_config = await xo.get_pool_config(address)
|
|
@@ -35,6 +38,7 @@ async def start_worker_components(address: str, supervisor_address: str):
|
|
|
35
38
|
supervisor_address=supervisor_address,
|
|
36
39
|
subpool_addresses=subpool_addresses, # exclude the main actor pool.
|
|
37
40
|
)
|
|
41
|
+
logger.info(f"Xinference worker successfully started.")
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
async def _start_worker(
|
xinference/locale/utils.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import codecs
|
|
15
16
|
import json
|
|
16
17
|
import locale
|
|
17
18
|
import os
|
|
@@ -27,7 +28,7 @@ class Locale:
|
|
|
27
28
|
os.path.dirname(os.path.abspath(__file__)), f"{self._language}.json"
|
|
28
29
|
)
|
|
29
30
|
if os.path.exists(json_path):
|
|
30
|
-
self._mapping = json.load(open(json_path))
|
|
31
|
+
self._mapping = json.load(codecs.open(json_path, "r", encoding="utf-8"))
|
|
31
32
|
else:
|
|
32
33
|
self._mapping = None
|
|
33
34
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"Please create model first": "请先创建模型",
|
|
3
|
+
"stop reason": "停止原因",
|
|
4
|
+
"Show stop reason": "展示停止原因",
|
|
5
|
+
"Max tokens": "最大 token 数量",
|
|
6
|
+
"The maximum number of tokens to generate.": "生成 token 数量最大值",
|
|
7
|
+
"Temperature": "温度参数",
|
|
8
|
+
"The temperature to use for sampling.": "温度参数用于调整输出的多样性,数值越高多样性越高",
|
|
9
|
+
"Top P": "Top P",
|
|
10
|
+
"The top-p value to use for sampling.": "用于控制生成文本的确定性,数值越低确定性越高",
|
|
11
|
+
"Window size": "窗口大小",
|
|
12
|
+
"Window size of chat history.": "用于生成回复的聊天历史窗口大小",
|
|
13
|
+
"show stop reason": "展示停止原因",
|
|
14
|
+
"Downloading": "下载中",
|
|
15
|
+
"model name": "模型名",
|
|
16
|
+
"model format": "模型格式",
|
|
17
|
+
"model size in billions": "模型大小(B)",
|
|
18
|
+
"quantization": "模型量化方式",
|
|
19
|
+
"Parameters": "参数调整",
|
|
20
|
+
"create": "创建",
|
|
21
|
+
"select model": "选择模型",
|
|
22
|
+
"Arena": "角斗场",
|
|
23
|
+
"Chat": "聊天",
|
|
24
|
+
"Input": "输入"
|
|
25
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: xinference
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Model Serving Made Easy
|
|
5
5
|
Home-page: https://github.com/xorbitsai/inference
|
|
6
6
|
Author: Qin Xuye
|
|
@@ -26,6 +26,12 @@ Requires-Dist: click
|
|
|
26
26
|
Requires-Dist: tqdm
|
|
27
27
|
Requires-Dist: tabulate
|
|
28
28
|
Requires-Dist: requests
|
|
29
|
+
Requires-Dist: pydantic
|
|
30
|
+
Requires-Dist: fastapi
|
|
31
|
+
Requires-Dist: uvicorn
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: chatglm-cpp ; extra == 'all'
|
|
34
|
+
Requires-Dist: llama-cpp-python ; extra == 'all'
|
|
29
35
|
Provides-Extra: dev
|
|
30
36
|
Requires-Dist: cython (>=0.29) ; extra == 'dev'
|
|
31
37
|
Requires-Dist: pytest (>=3.5.0) ; extra == 'dev'
|
|
@@ -37,12 +43,12 @@ Requires-Dist: flake8 (>=3.8.0) ; extra == 'dev'
|
|
|
37
43
|
Requires-Dist: black ; extra == 'dev'
|
|
38
44
|
|
|
39
45
|
[](https://pypi.org/project/xinference/)
|
|
40
|
-
[](https://github.com/xorbitsai/inference/blob/main/LICENSE)
|
|
41
47
|
[](https://actions-badge.atrox.dev/xorbitsai/inference/goto?ref=main)
|
|
42
48
|
[](https://join.slack.com/t/xorbitsio/shared_invite/zt-1o3z9ucdh-RbfhbPVpx7prOVdM1CAuxg)
|
|
43
49
|
[](https://twitter.com/xorbitsio)
|
|
44
50
|
|
|
45
|
-
# Xorbits
|
|
51
|
+
# Xorbits Inference: Model Serving Made Easy 🤖
|
|
46
52
|
|
|
47
53
|
Welcome to the Xorbits Inference GitHub repository!
|
|
48
54
|
|
|
@@ -57,10 +63,16 @@ which is specifically designed to enable large models and high performance on co
|
|
|
57
63
|
We are actively working on expanding Xorbits Inference's support to include additional runtimes,
|
|
58
64
|
including PyTorch and JAX, in the near future.
|
|
59
65
|
|
|
66
|
+

|
|
67
|
+
|
|
68
|
+
<div align="center">
|
|
69
|
+
<i><a href="https://join.slack.com/t/xorbitsio/shared_invite/zt-1z3zsm9ep-87yI9YZ_B79HLB2ccTq4WA">👉 Join our Slack community!</a></i>
|
|
70
|
+
</div>
|
|
71
|
+
|
|
60
72
|
## Key Features
|
|
61
73
|
🌟 **Model Serving Made Easy**: Inference simplifies the process of serving large language, speech
|
|
62
|
-
recognition, and multimodal models.
|
|
63
|
-
for experimentation and production.
|
|
74
|
+
recognition, and multimodal models. You can set up and deploy your models
|
|
75
|
+
for experimentation and production with a single command.
|
|
64
76
|
|
|
65
77
|
⚡️ **State-of-the-Art Models**: Experiment with cutting-edge built-in models using a single
|
|
66
78
|
command. Inference provides access to state-of-the-art open-source models!
|
|
@@ -78,43 +90,44 @@ for seamless management and monitoring.
|
|
|
78
90
|
allowing the seamless distribution of model inference across multiple devices or machines. It
|
|
79
91
|
leverages distributed computing techniques to parallelize and scale the inference process.
|
|
80
92
|
|
|
81
|
-
🔌 **Built-in Integration with Third-Party Libraries**: Xorbits Inference
|
|
82
|
-
|
|
93
|
+
🔌 **Built-in Integration with Third-Party Libraries**: Xorbits Inference seamlessly integrates
|
|
94
|
+
with popular third-party libraries like LangChain and LlamaIndex. (Coming soon)
|
|
83
95
|
|
|
84
96
|
## Getting Started
|
|
85
97
|
Xinference can be installed via pip from PyPI. It is highly recommended to create a new virtual
|
|
86
98
|
environment to avoid conflicts.
|
|
87
99
|
```bash
|
|
88
|
-
$ pip install xinference
|
|
100
|
+
$ pip install "xinference[all]"
|
|
89
101
|
```
|
|
102
|
+
"xinference[all]" installs all the necessary packages for serving models. If you want to achieve acceleration on
|
|
103
|
+
different hardware, refer to the installation documentation of the corresponding package.
|
|
104
|
+
- [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-from-pypi-recommended) is required to run `baichuan`, `wizardlm-v1.0`, `vicuna-v1.3` and `orca`.
|
|
105
|
+
- [chatglm-cpp-python](https://github.com/li-plus/chatglm.cpp#getting-started) is required to run `chatglm` and `chatglm2`.
|
|
106
|
+
|
|
90
107
|
|
|
91
108
|
### Deployment
|
|
92
|
-
|
|
109
|
+
You can deploy Xinference locally with a single command or deploy it in a distributed cluster.
|
|
93
110
|
|
|
111
|
+
#### Local
|
|
112
|
+
To start a local instance of Xinference, run the following command:
|
|
94
113
|
```bash
|
|
95
|
-
$ xinference
|
|
96
|
-
-p,--port 9997 \
|
|
97
|
-
--log-level INFO
|
|
114
|
+
$ xinference
|
|
98
115
|
```
|
|
99
116
|
|
|
100
|
-
|
|
117
|
+
#### Distributed
|
|
118
|
+
|
|
119
|
+
To deploy Xinference in a cluster, you need to start a Xinference supervisor on one server and
|
|
101
120
|
Xinference workers on the other servers. Follow the steps below:
|
|
102
121
|
|
|
103
|
-
|
|
104
|
-
On the server where you want to run the Xinference supervisor, run the following command:
|
|
122
|
+
**Starting the Supervisor**: On the server where you want to run the Xinference supervisor, run the following command:
|
|
105
123
|
```bash
|
|
106
|
-
$ xinference-supervisor -H
|
|
107
|
-
-p,--port 9997 \
|
|
108
|
-
--log-level INFO
|
|
124
|
+
$ xinference-supervisor -H "${supervisor_host}"
|
|
109
125
|
```
|
|
110
126
|
Replace `${supervisor_host}` with the actual host of your supervisor server.
|
|
111
127
|
|
|
112
|
-
|
|
113
|
-
On each of the other servers where you want to run Xinference workers, run the following command:
|
|
128
|
+
**Starting the Workers**: On each of the other servers where you want to run Xinference workers, run the following command:
|
|
114
129
|
```bash
|
|
115
|
-
$ xinference-worker -e
|
|
116
|
-
-H,--host "0.0.0.0" \
|
|
117
|
-
--log-level INFO
|
|
130
|
+
$ xinference-worker -e "http://${supervisor_host}:9997"
|
|
118
131
|
```
|
|
119
132
|
|
|
120
133
|
Once Xinference is running, an endpoint will be accessible for model management via CLI or
|
|
@@ -124,7 +137,7 @@ Xinference client.
|
|
|
124
137
|
- For cluster deployment, the endpoint will be `http://${supervisor_host}:9997`, where
|
|
125
138
|
`${supervisor_host}` is the hostname or IP address of the server where the supervisor is running.
|
|
126
139
|
|
|
127
|
-
You can also view a web UI using the Xinference endpoint
|
|
140
|
+
You can also view a web UI using the Xinference endpoint to chat with all the
|
|
128
141
|
builtin models. You can even **chat with two cutting-edge AI models side-by-side to compare
|
|
129
142
|
their performance**!
|
|
130
143
|
|
|
@@ -192,26 +205,27 @@ To view the builtin models, run the following command:
|
|
|
192
205
|
$ xinference list --all
|
|
193
206
|
```
|
|
194
207
|
|
|
195
|
-
| Name | Format
|
|
196
|
-
| --------------------
|
|
197
|
-
| baichuan |
|
|
198
|
-
|
|
|
199
|
-
|
|
|
200
|
-
|
|
|
201
|
-
|
|
|
202
|
-
|
|
|
208
|
+
| Name | Type | Language | Format | Size (in billions) | Quantization |
|
|
209
|
+
| -------------------- |------------------|----------|--------|--------------------|----------------------------------------|
|
|
210
|
+
| baichuan | Foundation Model | en, zh | ggmlv3 | 7 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' |
|
|
211
|
+
| chatglm | SFT Model | en, zh | ggmlv3 | 6 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' |
|
|
212
|
+
| chatglm2 | SFT Model | en, zh | ggmlv3 | 6 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' |
|
|
213
|
+
| wizardlm-v1.0 | SFT Model | en | ggmlv3 | 7, 13, 33 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' |
|
|
214
|
+
| vicuna-v1.3 | SFT Model | en | ggmlv3 | 7, 13 | 'q2_K', 'q3_K_L', ... , 'q6_K', 'q8_0' |
|
|
215
|
+
| orca | SFT Model | en | ggmlv3 | 3, 7, 13 | 'q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0' |
|
|
216
|
+
|
|
203
217
|
|
|
204
218
|
**NOTE**:
|
|
205
|
-
-
|
|
206
|
-
-
|
|
207
|
-
-
|
|
219
|
+
- Xinference will download models automatically for you, and by default the models will be saved under `${USER}/.xinference/cache`.
|
|
220
|
+
- Foundation models only provide interface `generate`.
|
|
221
|
+
- SFT models provide both `generate` and `chat`.
|
|
208
222
|
|
|
209
223
|
## Roadmap
|
|
210
224
|
Xinference is currently under active development. Here's a roadmap outlining our planned
|
|
211
225
|
developments for the next few weeks:
|
|
212
226
|
|
|
213
227
|
### PyTorch Support
|
|
214
|
-
With PyTorch integration, users will be able to seamlessly utilize PyTorch models
|
|
228
|
+
With PyTorch integration, users will be able to seamlessly utilize PyTorch models from Hugging Face
|
|
215
229
|
within Xinference.
|
|
216
230
|
|
|
217
231
|
### Langchain & LlamaIndex integration
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
xinference/__init__.py,sha256=m4EiujOGORjfbl5YcYoxDmplbYfgXN3Li-xgqDZfW-w,905
|
|
2
|
-
xinference/_version.py,sha256=
|
|
2
|
+
xinference/_version.py,sha256=MjZnPWmITWtAxkvUPhFnXKOOzCmWCy0jlj9vmeFpuVk,497
|
|
3
3
|
xinference/client.py,sha256=ERs_I9wWGnKLvzNaQw2F2d2dbgnAKs893mgvcRoXPhw,7048
|
|
4
4
|
xinference/conftest.py,sha256=qFQx4Urx0q4S6cmze57s3FJenX5vJwUe7Jsg38f1FY4,1381
|
|
5
5
|
xinference/constants.py,sha256=2z_oIhW03wiWJnn8KAdsIizpFX7H9NGJaGh2FrWCdvw,923
|
|
@@ -7,20 +7,21 @@ xinference/isolation.py,sha256=NstVRcO3dG4umHExICXAHlzVKwH8ch8MBwKwE-KFkE0,1826
|
|
|
7
7
|
xinference/types.py,sha256=GVR5trtdJ_cZabfXaK8P2SRIdKyZKtc2mV8NqTkiIic,2616
|
|
8
8
|
xinference/core/__init__.py,sha256=Fe5tYCHDbYJ7PhxJhQ68VbfgKgOsAuslNPr4wPhFMJM,612
|
|
9
9
|
xinference/core/api.py,sha256=tYuweUadKvX9ZBxSd1piUnw0da3h2vOgxi_tBarUDmY,4616
|
|
10
|
-
xinference/core/gradio.py,sha256=
|
|
11
|
-
xinference/core/model.py,sha256=
|
|
10
|
+
xinference/core/gradio.py,sha256=ovjRVMm7OA5JHlBKzBka68EB_LBWK6gt-v570vVxbSk,15761
|
|
11
|
+
xinference/core/model.py,sha256=o3Emh62xlXAqB7rQS6-pAnbUlV0MOiOVjhwuu5JyK2Y,3232
|
|
12
12
|
xinference/core/resource.py,sha256=784fXHDc3Qj96b04xS7gPs_b-209yHQdMliiip59RRs,1500
|
|
13
|
-
xinference/core/restful_api.py,sha256=
|
|
14
|
-
xinference/core/service.py,sha256=
|
|
13
|
+
xinference/core/restful_api.py,sha256=MB9oBqQE2cKy05A3pCdE-QQix96oZdA1gRk84HfpJxI,15058
|
|
14
|
+
xinference/core/service.py,sha256=PY1j_iXm8AiKygTWo4L_vodFJJHNMBjjKIwAdW-cnLE,12142
|
|
15
15
|
xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
16
16
|
xinference/deploy/cmdline.py,sha256=llZ86qHI8Df6MVqMX8tBEUsvINe8NADFVL2pVzbZKf8,6024
|
|
17
17
|
xinference/deploy/local.py,sha256=2WYXI4vR8maTERCm6QwZFUBs9ZbBU6BCzfmKJCxZdZY,1728
|
|
18
|
-
xinference/deploy/supervisor.py,sha256=
|
|
18
|
+
xinference/deploy/supervisor.py,sha256=qvAG2acHQ1h-j-vXyS8Yk5WvDZWRO7JlTdcxU_LjEhQ,3195
|
|
19
19
|
xinference/deploy/utils.py,sha256=wH3qwI-sfEOhNb3cBWrmZ3NlRTkDlpTmeT7ueCtuYcI,2169
|
|
20
|
-
xinference/deploy/worker.py,sha256=
|
|
20
|
+
xinference/deploy/worker.py,sha256=dxfutvd44de_CXXpL-7GtBHkYwk9s8E7qxkOjtqdl5Y,2218
|
|
21
21
|
xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
22
22
|
xinference/locale/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
23
|
-
xinference/locale/utils.py,sha256=
|
|
23
|
+
xinference/locale/utils.py,sha256=w-G1DAJGw1UUQVVtq6khOZn7ZjobUmTw6qwHMm2eWIs,1312
|
|
24
|
+
xinference/locale/zh_CN.json,sha256=hxvSiZ4OeAUuZeE75IrwFRgbQbt-MSCU9OC_JHvVnHg,1027
|
|
24
25
|
xinference/model/__init__.py,sha256=myv0n9wrM1tA6hNo_w6CZlwaxPJElG_VjgySFUTPuOg,6104
|
|
25
26
|
xinference/model/llm/__init__.py,sha256=2E2KxH_f3L4bkFY8ZGifVNDQhpXl70q3gLGS0EnOKZs,5575
|
|
26
27
|
xinference/model/llm/chatglm.py,sha256=vc7RB6nP4TTVTlJjyFs7bRWJMIeo8rlNfWyXaDzESTo,5823
|
|
@@ -28,9 +29,9 @@ xinference/model/llm/core.py,sha256=dXTE5FgpgJm-Lwu9CulC7Cpy9XV5Z57GbbMQF9uhDNc,
|
|
|
28
29
|
xinference/model/llm/orca.py,sha256=cxWXxToLoTqndRZ6Fp2Od-zDRXGDW034_SozTED_Bik,1517
|
|
29
30
|
xinference/model/llm/vicuna.py,sha256=peUm575H4p_AhcAr1BCc30S8xp-jAcHpwCrM9gseWqw,2436
|
|
30
31
|
xinference/model/llm/wizardlm.py,sha256=7KJrqBROJs4BzQVMozhkFk4el1yC7nJ4k6SUvJeC77A,1420
|
|
31
|
-
xinference-0.0.
|
|
32
|
-
xinference-0.0.
|
|
33
|
-
xinference-0.0.
|
|
34
|
-
xinference-0.0.
|
|
35
|
-
xinference-0.0.
|
|
36
|
-
xinference-0.0.
|
|
32
|
+
xinference-0.0.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
33
|
+
xinference-0.0.3.dist-info/METADATA,sha256=ozdjTGDXIrPRNDPse1WFv0Mfdi43lAnXrcdA_upN82s,10690
|
|
34
|
+
xinference-0.0.3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
35
|
+
xinference-0.0.3.dist-info/entry_points.txt,sha256=fJEruRdhXyPmHXYOGmoTmWvF0vO-nGdOBeX8xxDy8q8,175
|
|
36
|
+
xinference-0.0.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
|
|
37
|
+
xinference-0.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|