xinference 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/conftest.py +0 -8
- xinference/constants.py +1 -0
- xinference/core/model.py +34 -2
- xinference/core/supervisor.py +5 -5
- xinference/core/utils.py +9 -10
- xinference/core/worker.py +5 -4
- xinference/deploy/cmdline.py +5 -0
- xinference/deploy/utils.py +7 -4
- xinference/model/audio/model_spec.json +1 -1
- xinference/model/llm/core.py +1 -3
- xinference/model/llm/llm_family.json +87 -0
- xinference/model/llm/llm_family_modelscope.json +91 -0
- xinference/model/llm/vllm/core.py +2 -1
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/METADATA +4 -3
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/RECORD +20 -20
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.2.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-11-
|
|
11
|
+
"date": "2024-11-07T16:55:36+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.16.
|
|
14
|
+
"full-revisionid": "85ab86bf1c0967e45fbec995534cd5a0c9a9c439",
|
|
15
|
+
"version": "0.16.3"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/conftest.py
CHANGED
|
@@ -58,10 +58,6 @@ TEST_LOGGING_CONF = {
|
|
|
58
58
|
"propagate": False,
|
|
59
59
|
}
|
|
60
60
|
},
|
|
61
|
-
"root": {
|
|
62
|
-
"level": "WARN",
|
|
63
|
-
"handlers": ["stream_handler"],
|
|
64
|
-
},
|
|
65
61
|
}
|
|
66
62
|
|
|
67
63
|
TEST_LOG_FILE_PATH = get_log_file(f"test_{get_timestamp_ms()}")
|
|
@@ -102,10 +98,6 @@ TEST_FILE_LOGGING_CONF = {
|
|
|
102
98
|
"propagate": False,
|
|
103
99
|
}
|
|
104
100
|
},
|
|
105
|
-
"root": {
|
|
106
|
-
"level": "WARN",
|
|
107
|
-
"handlers": ["stream_handler", "file_handler"],
|
|
108
|
-
},
|
|
109
101
|
}
|
|
110
102
|
|
|
111
103
|
|
xinference/constants.py
CHANGED
xinference/core/model.py
CHANGED
|
@@ -40,7 +40,10 @@ from typing import (
|
|
|
40
40
|
import sse_starlette.sse
|
|
41
41
|
import xoscar as xo
|
|
42
42
|
|
|
43
|
-
from ..constants import
|
|
43
|
+
from ..constants import (
|
|
44
|
+
XINFERENCE_LAUNCH_MODEL_RETRY,
|
|
45
|
+
XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
if TYPE_CHECKING:
|
|
46
49
|
from .progress_tracker import ProgressTrackerActor
|
|
@@ -134,6 +137,8 @@ def oom_check(fn):
|
|
|
134
137
|
|
|
135
138
|
|
|
136
139
|
class ModelActor(xo.StatelessActor):
|
|
140
|
+
_replica_model_uid: Optional[str]
|
|
141
|
+
|
|
137
142
|
@classmethod
|
|
138
143
|
def gen_uid(cls, model: "LLM"):
|
|
139
144
|
return f"{model.__class__}-model-actor"
|
|
@@ -192,6 +197,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
192
197
|
supervisor_address: str,
|
|
193
198
|
worker_address: str,
|
|
194
199
|
model: "LLM",
|
|
200
|
+
replica_model_uid: str,
|
|
195
201
|
model_description: Optional["ModelDescription"] = None,
|
|
196
202
|
request_limits: Optional[int] = None,
|
|
197
203
|
):
|
|
@@ -203,6 +209,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
203
209
|
|
|
204
210
|
self._supervisor_address = supervisor_address
|
|
205
211
|
self._worker_address = worker_address
|
|
212
|
+
self._replica_model_uid = replica_model_uid
|
|
206
213
|
self._model = model
|
|
207
214
|
self._model_description = (
|
|
208
215
|
model_description.to_dict() if model_description else {}
|
|
@@ -257,6 +264,9 @@ class ModelActor(xo.StatelessActor):
|
|
|
257
264
|
uid=FluxBatchSchedulerActor.gen_uid(self.model_uid()),
|
|
258
265
|
)
|
|
259
266
|
|
|
267
|
+
def __repr__(self) -> str:
|
|
268
|
+
return f"ModelActor({self._replica_model_uid})"
|
|
269
|
+
|
|
260
270
|
async def _record_completion_metrics(
|
|
261
271
|
self, duration, completion_tokens, prompt_tokens
|
|
262
272
|
):
|
|
@@ -374,7 +384,28 @@ class ModelActor(xo.StatelessActor):
|
|
|
374
384
|
return condition
|
|
375
385
|
|
|
376
386
|
async def load(self):
|
|
377
|
-
|
|
387
|
+
try:
|
|
388
|
+
# Change process title for model
|
|
389
|
+
import setproctitle
|
|
390
|
+
|
|
391
|
+
setproctitle.setproctitle(f"Model: {self._replica_model_uid}")
|
|
392
|
+
except ImportError:
|
|
393
|
+
pass
|
|
394
|
+
i = 0
|
|
395
|
+
while True:
|
|
396
|
+
i += 1
|
|
397
|
+
try:
|
|
398
|
+
self._model.load()
|
|
399
|
+
break
|
|
400
|
+
except Exception as e:
|
|
401
|
+
if (
|
|
402
|
+
i < XINFERENCE_LAUNCH_MODEL_RETRY
|
|
403
|
+
and str(e).find("busy or unavailable") >= 0
|
|
404
|
+
):
|
|
405
|
+
await asyncio.sleep(5)
|
|
406
|
+
logger.warning("Retry to load model {model_uid}: %d times", i)
|
|
407
|
+
continue
|
|
408
|
+
raise
|
|
378
409
|
if self.allow_batching():
|
|
379
410
|
await self._scheduler_ref.set_model(self._model)
|
|
380
411
|
logger.debug(
|
|
@@ -385,6 +416,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
385
416
|
logger.debug(
|
|
386
417
|
f"Batching enabled for model: {self.model_uid()}, max_num_images: {self._model.get_max_num_images_for_batching()}"
|
|
387
418
|
)
|
|
419
|
+
logger.info(f"{self} loaded")
|
|
388
420
|
|
|
389
421
|
def model_uid(self):
|
|
390
422
|
return (
|
xinference/core/supervisor.py
CHANGED
|
@@ -970,7 +970,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
970
970
|
raise ValueError(
|
|
971
971
|
f"Model is already in the model list, uid: {_replica_model_uid}"
|
|
972
972
|
)
|
|
973
|
-
replica_gpu_idx = assign_replica_gpu(_replica_model_uid, gpu_idx)
|
|
973
|
+
replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
|
|
974
974
|
nonlocal model_type
|
|
975
975
|
|
|
976
976
|
worker_ref = (
|
|
@@ -1084,7 +1084,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1084
1084
|
dead_models,
|
|
1085
1085
|
)
|
|
1086
1086
|
for replica_model_uid in dead_models:
|
|
1087
|
-
model_uid, _
|
|
1087
|
+
model_uid, _ = parse_replica_model_uid(replica_model_uid)
|
|
1088
1088
|
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
1089
1089
|
self._replica_model_uid_to_worker.pop(
|
|
1090
1090
|
replica_model_uid, None
|
|
@@ -1137,7 +1137,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1137
1137
|
raise ValueError(f"Model not found in the model list, uid: {model_uid}")
|
|
1138
1138
|
|
|
1139
1139
|
replica_model_uid = build_replica_model_uid(
|
|
1140
|
-
model_uid,
|
|
1140
|
+
model_uid, next(replica_info.scheduler)
|
|
1141
1141
|
)
|
|
1142
1142
|
|
|
1143
1143
|
worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
|
|
@@ -1154,7 +1154,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1154
1154
|
raise ValueError(f"Model not found in the model list, uid: {model_uid}")
|
|
1155
1155
|
# Use rep id 0 to instead of next(replica_info.scheduler) to avoid
|
|
1156
1156
|
# consuming the generator.
|
|
1157
|
-
replica_model_uid = build_replica_model_uid(model_uid,
|
|
1157
|
+
replica_model_uid = build_replica_model_uid(model_uid, 0)
|
|
1158
1158
|
worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
|
|
1159
1159
|
if worker_ref is None:
|
|
1160
1160
|
raise ValueError(
|
|
@@ -1260,7 +1260,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1260
1260
|
uids_to_remove.append(model_uid)
|
|
1261
1261
|
|
|
1262
1262
|
for replica_model_uid in uids_to_remove:
|
|
1263
|
-
model_uid, _
|
|
1263
|
+
model_uid, _ = parse_replica_model_uid(replica_model_uid)
|
|
1264
1264
|
self._model_uid_to_replica_info.pop(model_uid, None)
|
|
1265
1265
|
self._replica_model_uid_to_worker.pop(replica_model_uid, None)
|
|
1266
1266
|
|
xinference/core/utils.py
CHANGED
|
@@ -146,27 +146,26 @@ def iter_replica_model_uid(model_uid: str, replica: int) -> Generator[str, None,
|
|
|
146
146
|
"""
|
|
147
147
|
replica = int(replica)
|
|
148
148
|
for rep_id in range(replica):
|
|
149
|
-
yield f"{model_uid}-{
|
|
149
|
+
yield f"{model_uid}-{rep_id}"
|
|
150
150
|
|
|
151
151
|
|
|
152
|
-
def build_replica_model_uid(model_uid: str,
|
|
152
|
+
def build_replica_model_uid(model_uid: str, rep_id: int) -> str:
|
|
153
153
|
"""
|
|
154
154
|
Build a replica model uid.
|
|
155
155
|
"""
|
|
156
|
-
return f"{model_uid}-{
|
|
156
|
+
return f"{model_uid}-{rep_id}"
|
|
157
157
|
|
|
158
158
|
|
|
159
|
-
def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int
|
|
159
|
+
def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int]:
|
|
160
160
|
"""
|
|
161
|
-
Parse replica model uid to model uid
|
|
161
|
+
Parse replica model uid to model uid and rep id.
|
|
162
162
|
"""
|
|
163
163
|
parts = replica_model_uid.split("-")
|
|
164
164
|
if len(parts) == 1:
|
|
165
|
-
return replica_model_uid, -1
|
|
165
|
+
return replica_model_uid, -1
|
|
166
166
|
rep_id = int(parts.pop())
|
|
167
|
-
replica = int(parts.pop())
|
|
168
167
|
model_uid = "-".join(parts)
|
|
169
|
-
return model_uid,
|
|
168
|
+
return model_uid, rep_id
|
|
170
169
|
|
|
171
170
|
|
|
172
171
|
def is_valid_model_uid(model_uid: str) -> bool:
|
|
@@ -261,9 +260,9 @@ def get_nvidia_gpu_info() -> Dict:
|
|
|
261
260
|
|
|
262
261
|
|
|
263
262
|
def assign_replica_gpu(
|
|
264
|
-
_replica_model_uid: str, gpu_idx: Union[int, List[int]]
|
|
263
|
+
_replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
|
|
265
264
|
) -> List[int]:
|
|
266
|
-
model_uid,
|
|
265
|
+
model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
|
|
267
266
|
rep_id, replica = int(rep_id), int(replica)
|
|
268
267
|
if isinstance(gpu_idx, int):
|
|
269
268
|
gpu_idx = [gpu_idx]
|
xinference/core/worker.py
CHANGED
|
@@ -157,7 +157,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
157
157
|
model_uid,
|
|
158
158
|
recover_count - 1,
|
|
159
159
|
)
|
|
160
|
-
event_model_uid, _
|
|
160
|
+
event_model_uid, _ = parse_replica_model_uid(model_uid)
|
|
161
161
|
try:
|
|
162
162
|
if self._event_collector_ref is not None:
|
|
163
163
|
await self._event_collector_ref.report_event(
|
|
@@ -377,7 +377,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
377
377
|
return len(self._model_uid_to_model)
|
|
378
378
|
|
|
379
379
|
async def is_model_vllm_backend(self, model_uid: str) -> bool:
|
|
380
|
-
_model_uid, _
|
|
380
|
+
_model_uid, _ = parse_replica_model_uid(model_uid)
|
|
381
381
|
supervisor_ref = await self.get_supervisor_ref()
|
|
382
382
|
model_ref = await supervisor_ref.get_model(_model_uid)
|
|
383
383
|
return await model_ref.is_vllm_backend()
|
|
@@ -800,7 +800,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
800
800
|
launch_args.update(kwargs)
|
|
801
801
|
|
|
802
802
|
try:
|
|
803
|
-
origin_uid, _
|
|
803
|
+
origin_uid, _ = parse_replica_model_uid(model_uid)
|
|
804
804
|
except Exception as e:
|
|
805
805
|
logger.exception(e)
|
|
806
806
|
raise
|
|
@@ -889,6 +889,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
889
889
|
uid=model_uid,
|
|
890
890
|
supervisor_address=self._supervisor_address,
|
|
891
891
|
worker_address=self.address,
|
|
892
|
+
replica_model_uid=model_uid,
|
|
892
893
|
model=model,
|
|
893
894
|
model_description=model_description,
|
|
894
895
|
request_limits=request_limits,
|
|
@@ -926,7 +927,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
926
927
|
# Terminate model while its launching is not allow
|
|
927
928
|
if model_uid in self._model_uid_launching_guard:
|
|
928
929
|
raise ValueError(f"{model_uid} is launching")
|
|
929
|
-
origin_uid, _
|
|
930
|
+
origin_uid, _ = parse_replica_model_uid(model_uid)
|
|
930
931
|
try:
|
|
931
932
|
_ = await self.get_supervisor_ref()
|
|
932
933
|
if self._event_collector_ref is not None:
|
xinference/deploy/cmdline.py
CHANGED
|
@@ -43,6 +43,7 @@ from .utils import (
|
|
|
43
43
|
get_log_file,
|
|
44
44
|
get_timestamp_ms,
|
|
45
45
|
handle_click_args_type,
|
|
46
|
+
set_envs,
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
try:
|
|
@@ -106,6 +107,8 @@ def start_local_cluster(
|
|
|
106
107
|
XINFERENCE_LOG_MAX_BYTES,
|
|
107
108
|
)
|
|
108
109
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
110
|
+
# refer to https://huggingface.co/docs/transformers/main_classes/logging
|
|
111
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
109
112
|
|
|
110
113
|
main(
|
|
111
114
|
host=host,
|
|
@@ -280,6 +283,7 @@ def supervisor(
|
|
|
280
283
|
XINFERENCE_LOG_MAX_BYTES,
|
|
281
284
|
)
|
|
282
285
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
286
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
283
287
|
|
|
284
288
|
main(
|
|
285
289
|
host=host,
|
|
@@ -342,6 +346,7 @@ def worker(
|
|
|
342
346
|
XINFERENCE_LOG_MAX_BYTES,
|
|
343
347
|
)
|
|
344
348
|
logging.config.dictConfig(dict_config) # type: ignore
|
|
349
|
+
set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
|
|
345
350
|
|
|
346
351
|
endpoint = get_endpoint(endpoint)
|
|
347
352
|
|
xinference/deploy/utils.py
CHANGED
|
@@ -134,10 +134,6 @@ def get_config_dict(
|
|
|
134
134
|
"propagate": False,
|
|
135
135
|
},
|
|
136
136
|
},
|
|
137
|
-
"root": {
|
|
138
|
-
"level": "WARN",
|
|
139
|
-
"handlers": ["stream_handler", "file_handler"],
|
|
140
|
-
},
|
|
141
137
|
}
|
|
142
138
|
return config_dict
|
|
143
139
|
|
|
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
|
|
|
220
216
|
pass
|
|
221
217
|
|
|
222
218
|
return arg
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def set_envs(key: str, value: str):
|
|
222
|
+
"""
|
|
223
|
+
Environment variables are set by the parent process and inherited by child processes
|
|
224
|
+
"""
|
|
225
|
+
os.environ[key] = value
|
|
@@ -127,7 +127,7 @@
|
|
|
127
127
|
"model_name": "ChatTTS",
|
|
128
128
|
"model_family": "ChatTTS",
|
|
129
129
|
"model_id": "2Noise/ChatTTS",
|
|
130
|
-
"model_revision": "
|
|
130
|
+
"model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
|
|
131
131
|
"model_ability": "text-to-audio",
|
|
132
132
|
"multilingual": true
|
|
133
133
|
},
|
xinference/model/llm/core.py
CHANGED
|
@@ -52,9 +52,7 @@ class LLM(abc.ABC):
|
|
|
52
52
|
*args,
|
|
53
53
|
**kwargs,
|
|
54
54
|
):
|
|
55
|
-
self.model_uid, self.
|
|
56
|
-
replica_model_uid
|
|
57
|
-
)
|
|
55
|
+
self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
|
|
58
56
|
self.model_family = model_family
|
|
59
57
|
self.model_spec = model_spec
|
|
60
58
|
self.quantization = quantization
|
|
@@ -1312,6 +1312,93 @@
|
|
|
1312
1312
|
"<|eom_id|>"
|
|
1313
1313
|
]
|
|
1314
1314
|
},
|
|
1315
|
+
{
|
|
1316
|
+
"version": 1,
|
|
1317
|
+
"context_length": 131072,
|
|
1318
|
+
"model_name": "llama-3.2-vision-instruct",
|
|
1319
|
+
"model_lang": [
|
|
1320
|
+
"en",
|
|
1321
|
+
"de",
|
|
1322
|
+
"fr",
|
|
1323
|
+
"it",
|
|
1324
|
+
"pt",
|
|
1325
|
+
"hi",
|
|
1326
|
+
"es",
|
|
1327
|
+
"th"
|
|
1328
|
+
],
|
|
1329
|
+
"model_ability": [
|
|
1330
|
+
"chat",
|
|
1331
|
+
"vision"
|
|
1332
|
+
],
|
|
1333
|
+
"model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
1334
|
+
"model_specs": [
|
|
1335
|
+
{
|
|
1336
|
+
"model_format": "pytorch",
|
|
1337
|
+
"model_size_in_billions": 11,
|
|
1338
|
+
"quantizations": [
|
|
1339
|
+
"none"
|
|
1340
|
+
],
|
|
1341
|
+
"model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
|
1342
|
+
},
|
|
1343
|
+
{
|
|
1344
|
+
"model_format": "pytorch",
|
|
1345
|
+
"model_size_in_billions": 90,
|
|
1346
|
+
"quantizations": [
|
|
1347
|
+
"none"
|
|
1348
|
+
],
|
|
1349
|
+
"model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
|
|
1350
|
+
}
|
|
1351
|
+
],
|
|
1352
|
+
"chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
|
1353
|
+
"stop_token_ids": [
|
|
1354
|
+
128001,
|
|
1355
|
+
128008,
|
|
1356
|
+
128009
|
|
1357
|
+
],
|
|
1358
|
+
"stop": [
|
|
1359
|
+
"<|end_of_text|>",
|
|
1360
|
+
"<|eot_id|>",
|
|
1361
|
+
"<|eom_id|>"
|
|
1362
|
+
]
|
|
1363
|
+
},
|
|
1364
|
+
{
|
|
1365
|
+
"version": 1,
|
|
1366
|
+
"context_length": 131072,
|
|
1367
|
+
"model_name": "llama-3.2-vision",
|
|
1368
|
+
"model_lang": [
|
|
1369
|
+
"en",
|
|
1370
|
+
"de",
|
|
1371
|
+
"fr",
|
|
1372
|
+
"it",
|
|
1373
|
+
"pt",
|
|
1374
|
+
"hi",
|
|
1375
|
+
"es",
|
|
1376
|
+
"th"
|
|
1377
|
+
],
|
|
1378
|
+
"model_ability": [
|
|
1379
|
+
"generate",
|
|
1380
|
+
"vision"
|
|
1381
|
+
],
|
|
1382
|
+
"model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
1383
|
+
"model_specs": [
|
|
1384
|
+
{
|
|
1385
|
+
"model_format": "pytorch",
|
|
1386
|
+
"model_size_in_billions": 11,
|
|
1387
|
+
"quantizations": [
|
|
1388
|
+
"none"
|
|
1389
|
+
],
|
|
1390
|
+
"model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
|
|
1391
|
+
},
|
|
1392
|
+
{
|
|
1393
|
+
"model_format": "pytorch",
|
|
1394
|
+
"model_size_in_billions": 90,
|
|
1395
|
+
"quantizations": [
|
|
1396
|
+
"none"
|
|
1397
|
+
],
|
|
1398
|
+
"model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
|
|
1399
|
+
}
|
|
1400
|
+
]
|
|
1401
|
+
},
|
|
1315
1402
|
{
|
|
1316
1403
|
"version": 1,
|
|
1317
1404
|
"context_length": 2048,
|
|
@@ -363,6 +363,97 @@
|
|
|
363
363
|
"<|eom_id|>"
|
|
364
364
|
]
|
|
365
365
|
},
|
|
366
|
+
{
|
|
367
|
+
"version": 1,
|
|
368
|
+
"context_length": 131072,
|
|
369
|
+
"model_name": "llama-3.2-vision-instruct",
|
|
370
|
+
"model_lang": [
|
|
371
|
+
"en",
|
|
372
|
+
"de",
|
|
373
|
+
"fr",
|
|
374
|
+
"it",
|
|
375
|
+
"pt",
|
|
376
|
+
"hi",
|
|
377
|
+
"es",
|
|
378
|
+
"th"
|
|
379
|
+
],
|
|
380
|
+
"model_ability": [
|
|
381
|
+
"chat",
|
|
382
|
+
"vision"
|
|
383
|
+
],
|
|
384
|
+
"model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
385
|
+
"model_specs": [
|
|
386
|
+
{
|
|
387
|
+
"model_format": "pytorch",
|
|
388
|
+
"model_size_in_billions": 11,
|
|
389
|
+
"quantizations": [
|
|
390
|
+
"none"
|
|
391
|
+
],
|
|
392
|
+
"model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
|
|
393
|
+
"model_hub": "modelscope"
|
|
394
|
+
},
|
|
395
|
+
{
|
|
396
|
+
"model_format": "pytorch",
|
|
397
|
+
"model_size_in_billions": 90,
|
|
398
|
+
"quantizations": [
|
|
399
|
+
"none"
|
|
400
|
+
],
|
|
401
|
+
"model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
|
|
402
|
+
"model_hub": "modelscope"
|
|
403
|
+
}
|
|
404
|
+
],
|
|
405
|
+
"chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
|
406
|
+
"stop_token_ids": [
|
|
407
|
+
128001,
|
|
408
|
+
128008,
|
|
409
|
+
128009
|
|
410
|
+
],
|
|
411
|
+
"stop": [
|
|
412
|
+
"<|end_of_text|>",
|
|
413
|
+
"<|eot_id|>",
|
|
414
|
+
"<|eom_id|>"
|
|
415
|
+
]
|
|
416
|
+
},
|
|
417
|
+
{
|
|
418
|
+
"version": 1,
|
|
419
|
+
"context_length": 131072,
|
|
420
|
+
"model_name": "llama-3.2-vision",
|
|
421
|
+
"model_lang": [
|
|
422
|
+
"en",
|
|
423
|
+
"de",
|
|
424
|
+
"fr",
|
|
425
|
+
"it",
|
|
426
|
+
"pt",
|
|
427
|
+
"hi",
|
|
428
|
+
"es",
|
|
429
|
+
"th"
|
|
430
|
+
],
|
|
431
|
+
"model_ability": [
|
|
432
|
+
"generate",
|
|
433
|
+
"vision"
|
|
434
|
+
],
|
|
435
|
+
"model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
|
|
436
|
+
"model_specs": [
|
|
437
|
+
{
|
|
438
|
+
"model_format": "pytorch",
|
|
439
|
+
"model_size_in_billions": 11,
|
|
440
|
+
"quantizations": [
|
|
441
|
+
"none"
|
|
442
|
+
],
|
|
443
|
+
"model_id": "LLM-Research/Llama-3.2-11B-Vision",
|
|
444
|
+
"model_hub": "modelscope"
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
"model_format": "pytorch",
|
|
448
|
+
"model_size_in_billions": 90,
|
|
449
|
+
"quantizations": [
|
|
450
|
+
"none"
|
|
451
|
+
],
|
|
452
|
+
"model_id": "LLM-Research/Llama-3.2-90B-Vision",
|
|
453
|
+
"model_hub": "modelscope"
|
|
454
|
+
}
|
|
455
|
+
]
|
|
456
|
+
},
|
|
366
457
|
{
|
|
367
458
|
"version": 1,
|
|
368
459
|
"context_length": 2048,
|
|
@@ -163,7 +163,6 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
|
|
|
163
163
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
|
|
164
164
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
|
|
165
165
|
|
|
166
|
-
|
|
167
166
|
if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
|
|
168
167
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
|
|
169
168
|
VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
|
|
@@ -177,6 +176,8 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
|
|
|
177
176
|
VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
|
|
178
177
|
|
|
179
178
|
if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
|
|
179
|
+
VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
|
|
180
|
+
VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
|
|
180
181
|
VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
|
|
181
182
|
|
|
182
183
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: xinference
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.3
|
|
4
4
|
Summary: Model Serving Made Easy
|
|
5
5
|
Home-page: https://github.com/xorbitsai/inference
|
|
6
6
|
Author: Qin Xuye
|
|
@@ -42,6 +42,7 @@ Requires-Dist: nvidia-ml-py
|
|
|
42
42
|
Requires-Dist: async-timeout
|
|
43
43
|
Requires-Dist: peft
|
|
44
44
|
Requires-Dist: timm
|
|
45
|
+
Requires-Dist: setproctitle
|
|
45
46
|
Provides-Extra: all
|
|
46
47
|
Requires-Dist: llama-cpp-python!=0.2.58,>=0.2.25; extra == "all"
|
|
47
48
|
Requires-Dist: transformers>=4.43.2; extra == "all"
|
|
@@ -71,7 +72,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "all"
|
|
|
71
72
|
Requires-Dist: librosa; extra == "all"
|
|
72
73
|
Requires-Dist: xxhash; extra == "all"
|
|
73
74
|
Requires-Dist: torchaudio; extra == "all"
|
|
74
|
-
Requires-Dist: ChatTTS>=0.2; extra == "all"
|
|
75
|
+
Requires-Dist: ChatTTS>=0.2.1; extra == "all"
|
|
75
76
|
Requires-Dist: lightning>=2.0.0; extra == "all"
|
|
76
77
|
Requires-Dist: hydra-core>=1.3.2; extra == "all"
|
|
77
78
|
Requires-Dist: inflect; extra == "all"
|
|
@@ -105,7 +106,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "audio"
|
|
|
105
106
|
Requires-Dist: librosa; extra == "audio"
|
|
106
107
|
Requires-Dist: xxhash; extra == "audio"
|
|
107
108
|
Requires-Dist: torchaudio; extra == "audio"
|
|
108
|
-
Requires-Dist: ChatTTS>=0.2; extra == "audio"
|
|
109
|
+
Requires-Dist: ChatTTS>=0.2.1; extra == "audio"
|
|
109
110
|
Requires-Dist: tiktoken; extra == "audio"
|
|
110
111
|
Requires-Dist: torch>=2.0.0; extra == "audio"
|
|
111
112
|
Requires-Dist: lightning>=2.0.0; extra == "audio"
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
|
|
2
2
|
xinference/_compat.py,sha256=xFztCfyrq3O_4bssL_ygghYkfxicv_ZhiX2YDDWHf-k,3571
|
|
3
|
-
xinference/_version.py,sha256=
|
|
4
|
-
xinference/conftest.py,sha256=
|
|
5
|
-
xinference/constants.py,sha256=
|
|
3
|
+
xinference/_version.py,sha256=ZJMSF8nqOAMuCeAs35nQ2pCDZSaWMd6E2vS-3qLZTSc,498
|
|
4
|
+
xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
|
|
5
|
+
xinference/constants.py,sha256=VMj62qQ4h36Jt-AmH5g6hmJJteSlKrA3r47K7bGWEPc,3859
|
|
6
6
|
xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
|
|
7
7
|
xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
|
|
8
8
|
xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
|
|
@@ -25,19 +25,19 @@ xinference/core/chat_interface.py,sha256=Kiqs1XOXgYBlP7DOXLEXaFjbVuS0yC1-dXJyxrx
|
|
|
25
25
|
xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
|
|
26
26
|
xinference/core/image_interface.py,sha256=5Iuoiw3g2TvgOYi3gRIAGApve2nNzfMPduRrBHvd1NY,13755
|
|
27
27
|
xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
|
|
28
|
-
xinference/core/model.py,sha256=
|
|
28
|
+
xinference/core/model.py,sha256=7BWvhZmLN2joYCCBWTqiVCMX0moGttz8Fyl15OY9hT8,39587
|
|
29
29
|
xinference/core/progress_tracker.py,sha256=LIF6CLIlnEoSBkuDCraJktDOzZ31mQ4HOo6EVr3KpQM,6453
|
|
30
30
|
xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
|
|
31
31
|
xinference/core/scheduler.py,sha256=gdj3SyP_jelJ86vTRrgnFynhxz5JSwLRsQgx8PTtBi8,15671
|
|
32
32
|
xinference/core/status_guard.py,sha256=4an1KjUOhCStgRQUw1VSzXcycXUtvhxwiMREKKcl1UI,2828
|
|
33
|
-
xinference/core/supervisor.py,sha256=
|
|
34
|
-
xinference/core/utils.py,sha256=
|
|
35
|
-
xinference/core/worker.py,sha256=
|
|
33
|
+
xinference/core/supervisor.py,sha256=Z7cY28M0OeY27-z-OhB9f7BDGs_TVvbSsez1rEJjpdo,52923
|
|
34
|
+
xinference/core/utils.py,sha256=iY9Oog3M-k3OoUJFUfIbcWUQ94Yq0T9iIG_b2iPudP0,8658
|
|
35
|
+
xinference/core/worker.py,sha256=YIlaQosBRj_VStfZGPfWnT2ie13GW8K4NNEP5qz28lI,46402
|
|
36
36
|
xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
37
|
-
xinference/deploy/cmdline.py,sha256=
|
|
37
|
+
xinference/deploy/cmdline.py,sha256=yQI6KuRUzih0rs_fInp2Lr3rwkOjBOM0eydPaF7VKDQ,48385
|
|
38
38
|
xinference/deploy/local.py,sha256=gcH6WfTxfhjvNkxxKZH3tcGtXV48BEPoaLWYztZHaeo,3954
|
|
39
39
|
xinference/deploy/supervisor.py,sha256=68rB2Ey5KFeF6zto9YGbw3P8QLZmF_KSh1NwH_pNP4w,2986
|
|
40
|
-
xinference/deploy/utils.py,sha256=
|
|
40
|
+
xinference/deploy/utils.py,sha256=jdL7i2WV6u_BZ8IiE1d3YktvCARcB3ntzMQ5rHGD5DM,6756
|
|
41
41
|
xinference/deploy/worker.py,sha256=VQ71ClWpeGsyFgDmcOes2ub1cil10cBjhFLHYeuVwC4,2974
|
|
42
42
|
xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
43
43
|
xinference/deploy/test/test_cmdline.py,sha256=m8xDzjtDuAJy0QkvYVJIZDuTB29cmYBV0d231JyRCPU,7714
|
|
@@ -51,7 +51,7 @@ xinference/model/audio/cosyvoice.py,sha256=Enur1Y4Xa-mpr7wwnoXWwhyh7PUAjrHZ8DV91
|
|
|
51
51
|
xinference/model/audio/custom.py,sha256=8GXBRmTtR-GY03-E91nlRGTIuabCRzlt20ecU6Un6Y8,4985
|
|
52
52
|
xinference/model/audio/fish_speech.py,sha256=v2WVEV-BLWnbiDvqrx8WTGE_YNKmd9QoAF1LZBXWxn0,7310
|
|
53
53
|
xinference/model/audio/funasr.py,sha256=65z7U7_F14CCP-jg6BpeY3_49FK7Y5OCRSzrhhsklCg,4075
|
|
54
|
-
xinference/model/audio/model_spec.json,sha256=
|
|
54
|
+
xinference/model/audio/model_spec.json,sha256=dHk9t-wBpQ7eso_6_csEO0LwTOoVucq_dAN9PxVjv5M,5120
|
|
55
55
|
xinference/model/audio/model_spec_modelscope.json,sha256=U82E5vZahi4si6kpCjdp2FAG2lCpQ7s7w_1t6lj2ysI,2038
|
|
56
56
|
xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
|
|
57
57
|
xinference/model/audio/whisper.py,sha256=PQL7rebGC7WlIOItuDtjdEtSJtlhxFkolot-Fj-8uDU,7982
|
|
@@ -82,11 +82,11 @@ xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17k
|
|
|
82
82
|
xinference/model/image/stable_diffusion/core.py,sha256=qqMjFcM7KpjQc79irWhTpweIVfenEcsSi6g_WDK7CFM,22982
|
|
83
83
|
xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
|
|
84
84
|
xinference/model/llm/__init__.py,sha256=9g9dFG2XuNDCTLE5vuJ6kCT-rqe9MfN56aEapyXaJ5M,13938
|
|
85
|
-
xinference/model/llm/core.py,sha256=
|
|
86
|
-
xinference/model/llm/llm_family.json,sha256=
|
|
85
|
+
xinference/model/llm/core.py,sha256=g-luuAjZizrPunhyFE9IRjn57l0g6FY_1xUwtlRegbs,8151
|
|
86
|
+
xinference/model/llm/llm_family.json,sha256=9pnfZbFv7XnsiW6vR3g8VpcIhdi4wjSZSCcRLnl5zuc,292604
|
|
87
87
|
xinference/model/llm/llm_family.py,sha256=tI2wPefd7v-PWcVhUO2qy6iGob_ioeNCwAQQzal-2o4,39549
|
|
88
88
|
xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
|
|
89
|
-
xinference/model/llm/llm_family_modelscope.json,sha256=
|
|
89
|
+
xinference/model/llm/llm_family_modelscope.json,sha256=DFKSCauDGx0nHZuyFRBpp4Kau0I5q-Aqf0Lrl_B69u4,221744
|
|
90
90
|
xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
|
|
91
91
|
xinference/model/llm/memory.py,sha256=NEIMw6wWaF9S_bnBYq-EyuDhVbUEEeceQhwE1iwsrhI,10207
|
|
92
92
|
xinference/model/llm/utils.py,sha256=DUC6jPr1-kPNsgc4J5MXNSMVgDlPLfQiitLGfdJxVxM,23596
|
|
@@ -120,7 +120,7 @@ xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjD
|
|
|
120
120
|
xinference/model/llm/transformers/utils.py,sha256=Ej9Tu2yVAotfXMFsl30QlYXLZTODU6Pv_UppsGGUiSw,19185
|
|
121
121
|
xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
|
|
122
122
|
xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
123
|
-
xinference/model/llm/vllm/core.py,sha256=
|
|
123
|
+
xinference/model/llm/vllm/core.py,sha256=gflboRHy4JvhDG6G2bjPgidgNFTU2dDepbTZBmeDGlY,31516
|
|
124
124
|
xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
|
|
125
125
|
xinference/model/rerank/__init__.py,sha256=wRpf1bOMfmAsuEKEGczMTB5fWEvuqltlJbIbRb-x8Ko,3483
|
|
126
126
|
xinference/model/rerank/core.py,sha256=1ef4Nb7z9z6-7-_Rcjw7VLm2AJvMlmXeIZd2Ap8VSQg,14405
|
|
@@ -15529,9 +15529,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
|
|
|
15529
15529
|
xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
|
|
15530
15530
|
xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
|
|
15531
15531
|
xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
|
|
15532
|
-
xinference-0.16.
|
|
15533
|
-
xinference-0.16.
|
|
15534
|
-
xinference-0.16.
|
|
15535
|
-
xinference-0.16.
|
|
15536
|
-
xinference-0.16.
|
|
15537
|
-
xinference-0.16.
|
|
15532
|
+
xinference-0.16.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
15533
|
+
xinference-0.16.3.dist-info/METADATA,sha256=7X0n1tJuGmaammNKKtZ16nV03LBJb6HcQSFsPkFVKy8,21042
|
|
15534
|
+
xinference-0.16.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
15535
|
+
xinference-0.16.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
|
|
15536
|
+
xinference-0.16.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
|
|
15537
|
+
xinference-0.16.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|