xinference 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-11-01T17:56:47+0800",
11
+ "date": "2024-11-07T16:55:36+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "67e97ab485b539dc7a208825bee0504acc37044e",
15
- "version": "0.16.2"
14
+ "full-revisionid": "85ab86bf1c0967e45fbec995534cd5a0c9a9c439",
15
+ "version": "0.16.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
xinference/conftest.py CHANGED
@@ -58,10 +58,6 @@ TEST_LOGGING_CONF = {
58
58
  "propagate": False,
59
59
  }
60
60
  },
61
- "root": {
62
- "level": "WARN",
63
- "handlers": ["stream_handler"],
64
- },
65
61
  }
66
62
 
67
63
  TEST_LOG_FILE_PATH = get_log_file(f"test_{get_timestamp_ms()}")
@@ -102,10 +98,6 @@ TEST_FILE_LOGGING_CONF = {
102
98
  "propagate": False,
103
99
  }
104
100
  },
105
- "root": {
106
- "level": "WARN",
107
- "handlers": ["stream_handler", "file_handler"],
108
- },
109
101
  }
110
102
 
111
103
 
xinference/constants.py CHANGED
@@ -87,3 +87,4 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
87
87
  XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
88
88
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
89
89
  )
90
+ XINFERENCE_LAUNCH_MODEL_RETRY = 3
xinference/core/model.py CHANGED
@@ -40,7 +40,10 @@ from typing import (
40
40
  import sse_starlette.sse
41
41
  import xoscar as xo
42
42
 
43
- from ..constants import XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE
43
+ from ..constants import (
44
+ XINFERENCE_LAUNCH_MODEL_RETRY,
45
+ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
46
+ )
44
47
 
45
48
  if TYPE_CHECKING:
46
49
  from .progress_tracker import ProgressTrackerActor
@@ -134,6 +137,8 @@ def oom_check(fn):
134
137
 
135
138
 
136
139
  class ModelActor(xo.StatelessActor):
140
+ _replica_model_uid: Optional[str]
141
+
137
142
  @classmethod
138
143
  def gen_uid(cls, model: "LLM"):
139
144
  return f"{model.__class__}-model-actor"
@@ -192,6 +197,7 @@ class ModelActor(xo.StatelessActor):
192
197
  supervisor_address: str,
193
198
  worker_address: str,
194
199
  model: "LLM",
200
+ replica_model_uid: str,
195
201
  model_description: Optional["ModelDescription"] = None,
196
202
  request_limits: Optional[int] = None,
197
203
  ):
@@ -203,6 +209,7 @@ class ModelActor(xo.StatelessActor):
203
209
 
204
210
  self._supervisor_address = supervisor_address
205
211
  self._worker_address = worker_address
212
+ self._replica_model_uid = replica_model_uid
206
213
  self._model = model
207
214
  self._model_description = (
208
215
  model_description.to_dict() if model_description else {}
@@ -257,6 +264,9 @@ class ModelActor(xo.StatelessActor):
257
264
  uid=FluxBatchSchedulerActor.gen_uid(self.model_uid()),
258
265
  )
259
266
 
267
+ def __repr__(self) -> str:
268
+ return f"ModelActor({self._replica_model_uid})"
269
+
260
270
  async def _record_completion_metrics(
261
271
  self, duration, completion_tokens, prompt_tokens
262
272
  ):
@@ -374,7 +384,28 @@ class ModelActor(xo.StatelessActor):
374
384
  return condition
375
385
 
376
386
  async def load(self):
377
- self._model.load()
387
+ try:
388
+ # Change process title for model
389
+ import setproctitle
390
+
391
+ setproctitle.setproctitle(f"Model: {self._replica_model_uid}")
392
+ except ImportError:
393
+ pass
394
+ i = 0
395
+ while True:
396
+ i += 1
397
+ try:
398
+ self._model.load()
399
+ break
400
+ except Exception as e:
401
+ if (
402
+ i < XINFERENCE_LAUNCH_MODEL_RETRY
403
+ and str(e).find("busy or unavailable") >= 0
404
+ ):
405
+ await asyncio.sleep(5)
406
+ logger.warning("Retry to load model {model_uid}: %d times", i)
407
+ continue
408
+ raise
378
409
  if self.allow_batching():
379
410
  await self._scheduler_ref.set_model(self._model)
380
411
  logger.debug(
@@ -385,6 +416,7 @@ class ModelActor(xo.StatelessActor):
385
416
  logger.debug(
386
417
  f"Batching enabled for model: {self.model_uid()}, max_num_images: {self._model.get_max_num_images_for_batching()}"
387
418
  )
419
+ logger.info(f"{self} loaded")
388
420
 
389
421
  def model_uid(self):
390
422
  return (
@@ -970,7 +970,7 @@ class SupervisorActor(xo.StatelessActor):
970
970
  raise ValueError(
971
971
  f"Model is already in the model list, uid: {_replica_model_uid}"
972
972
  )
973
- replica_gpu_idx = assign_replica_gpu(_replica_model_uid, gpu_idx)
973
+ replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
974
974
  nonlocal model_type
975
975
 
976
976
  worker_ref = (
@@ -1084,7 +1084,7 @@ class SupervisorActor(xo.StatelessActor):
1084
1084
  dead_models,
1085
1085
  )
1086
1086
  for replica_model_uid in dead_models:
1087
- model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
1087
+ model_uid, _ = parse_replica_model_uid(replica_model_uid)
1088
1088
  self._model_uid_to_replica_info.pop(model_uid, None)
1089
1089
  self._replica_model_uid_to_worker.pop(
1090
1090
  replica_model_uid, None
@@ -1137,7 +1137,7 @@ class SupervisorActor(xo.StatelessActor):
1137
1137
  raise ValueError(f"Model not found in the model list, uid: {model_uid}")
1138
1138
 
1139
1139
  replica_model_uid = build_replica_model_uid(
1140
- model_uid, replica_info.replica, next(replica_info.scheduler)
1140
+ model_uid, next(replica_info.scheduler)
1141
1141
  )
1142
1142
 
1143
1143
  worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
@@ -1154,7 +1154,7 @@ class SupervisorActor(xo.StatelessActor):
1154
1154
  raise ValueError(f"Model not found in the model list, uid: {model_uid}")
1155
1155
  # Use rep id 0 to instead of next(replica_info.scheduler) to avoid
1156
1156
  # consuming the generator.
1157
- replica_model_uid = build_replica_model_uid(model_uid, replica_info.replica, 0)
1157
+ replica_model_uid = build_replica_model_uid(model_uid, 0)
1158
1158
  worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
1159
1159
  if worker_ref is None:
1160
1160
  raise ValueError(
@@ -1260,7 +1260,7 @@ class SupervisorActor(xo.StatelessActor):
1260
1260
  uids_to_remove.append(model_uid)
1261
1261
 
1262
1262
  for replica_model_uid in uids_to_remove:
1263
- model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
1263
+ model_uid, _ = parse_replica_model_uid(replica_model_uid)
1264
1264
  self._model_uid_to_replica_info.pop(model_uid, None)
1265
1265
  self._replica_model_uid_to_worker.pop(replica_model_uid, None)
1266
1266
 
xinference/core/utils.py CHANGED
@@ -146,27 +146,26 @@ def iter_replica_model_uid(model_uid: str, replica: int) -> Generator[str, None,
146
146
  """
147
147
  replica = int(replica)
148
148
  for rep_id in range(replica):
149
- yield f"{model_uid}-{replica}-{rep_id}"
149
+ yield f"{model_uid}-{rep_id}"
150
150
 
151
151
 
152
- def build_replica_model_uid(model_uid: str, replica: int, rep_id: int) -> str:
152
+ def build_replica_model_uid(model_uid: str, rep_id: int) -> str:
153
153
  """
154
154
  Build a replica model uid.
155
155
  """
156
- return f"{model_uid}-{replica}-{rep_id}"
156
+ return f"{model_uid}-{rep_id}"
157
157
 
158
158
 
159
- def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int, int]:
159
+ def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int]:
160
160
  """
161
- Parse replica model uid to model uid, replica and rep id.
161
+ Parse replica model uid to model uid and rep id.
162
162
  """
163
163
  parts = replica_model_uid.split("-")
164
164
  if len(parts) == 1:
165
- return replica_model_uid, -1, -1
165
+ return replica_model_uid, -1
166
166
  rep_id = int(parts.pop())
167
- replica = int(parts.pop())
168
167
  model_uid = "-".join(parts)
169
- return model_uid, replica, rep_id
168
+ return model_uid, rep_id
170
169
 
171
170
 
172
171
  def is_valid_model_uid(model_uid: str) -> bool:
@@ -261,9 +260,9 @@ def get_nvidia_gpu_info() -> Dict:
261
260
 
262
261
 
263
262
  def assign_replica_gpu(
264
- _replica_model_uid: str, gpu_idx: Union[int, List[int]]
263
+ _replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
265
264
  ) -> List[int]:
266
- model_uid, replica, rep_id = parse_replica_model_uid(_replica_model_uid)
265
+ model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
267
266
  rep_id, replica = int(rep_id), int(replica)
268
267
  if isinstance(gpu_idx, int):
269
268
  gpu_idx = [gpu_idx]
xinference/core/worker.py CHANGED
@@ -157,7 +157,7 @@ class WorkerActor(xo.StatelessActor):
157
157
  model_uid,
158
158
  recover_count - 1,
159
159
  )
160
- event_model_uid, _, __ = parse_replica_model_uid(model_uid)
160
+ event_model_uid, _ = parse_replica_model_uid(model_uid)
161
161
  try:
162
162
  if self._event_collector_ref is not None:
163
163
  await self._event_collector_ref.report_event(
@@ -377,7 +377,7 @@ class WorkerActor(xo.StatelessActor):
377
377
  return len(self._model_uid_to_model)
378
378
 
379
379
  async def is_model_vllm_backend(self, model_uid: str) -> bool:
380
- _model_uid, _, _ = parse_replica_model_uid(model_uid)
380
+ _model_uid, _ = parse_replica_model_uid(model_uid)
381
381
  supervisor_ref = await self.get_supervisor_ref()
382
382
  model_ref = await supervisor_ref.get_model(_model_uid)
383
383
  return await model_ref.is_vllm_backend()
@@ -800,7 +800,7 @@ class WorkerActor(xo.StatelessActor):
800
800
  launch_args.update(kwargs)
801
801
 
802
802
  try:
803
- origin_uid, _, _ = parse_replica_model_uid(model_uid)
803
+ origin_uid, _ = parse_replica_model_uid(model_uid)
804
804
  except Exception as e:
805
805
  logger.exception(e)
806
806
  raise
@@ -889,6 +889,7 @@ class WorkerActor(xo.StatelessActor):
889
889
  uid=model_uid,
890
890
  supervisor_address=self._supervisor_address,
891
891
  worker_address=self.address,
892
+ replica_model_uid=model_uid,
892
893
  model=model,
893
894
  model_description=model_description,
894
895
  request_limits=request_limits,
@@ -926,7 +927,7 @@ class WorkerActor(xo.StatelessActor):
926
927
  # Terminate model while its launching is not allow
927
928
  if model_uid in self._model_uid_launching_guard:
928
929
  raise ValueError(f"{model_uid} is launching")
929
- origin_uid, _, __ = parse_replica_model_uid(model_uid)
930
+ origin_uid, _ = parse_replica_model_uid(model_uid)
930
931
  try:
931
932
  _ = await self.get_supervisor_ref()
932
933
  if self._event_collector_ref is not None:
@@ -43,6 +43,7 @@ from .utils import (
43
43
  get_log_file,
44
44
  get_timestamp_ms,
45
45
  handle_click_args_type,
46
+ set_envs,
46
47
  )
47
48
 
48
49
  try:
@@ -106,6 +107,8 @@ def start_local_cluster(
106
107
  XINFERENCE_LOG_MAX_BYTES,
107
108
  )
108
109
  logging.config.dictConfig(dict_config) # type: ignore
110
+ # refer to https://huggingface.co/docs/transformers/main_classes/logging
111
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
109
112
 
110
113
  main(
111
114
  host=host,
@@ -280,6 +283,7 @@ def supervisor(
280
283
  XINFERENCE_LOG_MAX_BYTES,
281
284
  )
282
285
  logging.config.dictConfig(dict_config) # type: ignore
286
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
283
287
 
284
288
  main(
285
289
  host=host,
@@ -342,6 +346,7 @@ def worker(
342
346
  XINFERENCE_LOG_MAX_BYTES,
343
347
  )
344
348
  logging.config.dictConfig(dict_config) # type: ignore
349
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
345
350
 
346
351
  endpoint = get_endpoint(endpoint)
347
352
 
@@ -134,10 +134,6 @@ def get_config_dict(
134
134
  "propagate": False,
135
135
  },
136
136
  },
137
- "root": {
138
- "level": "WARN",
139
- "handlers": ["stream_handler", "file_handler"],
140
- },
141
137
  }
142
138
  return config_dict
143
139
 
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
220
216
  pass
221
217
 
222
218
  return arg
219
+
220
+
221
+ def set_envs(key: str, value: str):
222
+ """
223
+ Environment variables are set by the parent process and inherited by child processes
224
+ """
225
+ os.environ[key] = value
@@ -127,7 +127,7 @@
127
127
  "model_name": "ChatTTS",
128
128
  "model_family": "ChatTTS",
129
129
  "model_id": "2Noise/ChatTTS",
130
- "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
130
+ "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
131
131
  "model_ability": "text-to-audio",
132
132
  "multilingual": true
133
133
  },
@@ -52,9 +52,7 @@ class LLM(abc.ABC):
52
52
  *args,
53
53
  **kwargs,
54
54
  ):
55
- self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
56
- replica_model_uid
57
- )
55
+ self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
58
56
  self.model_family = model_family
59
57
  self.model_spec = model_spec
60
58
  self.quantization = quantization
@@ -1312,6 +1312,93 @@
1312
1312
  "<|eom_id|>"
1313
1313
  ]
1314
1314
  },
1315
+ {
1316
+ "version": 1,
1317
+ "context_length": 131072,
1318
+ "model_name": "llama-3.2-vision-instruct",
1319
+ "model_lang": [
1320
+ "en",
1321
+ "de",
1322
+ "fr",
1323
+ "it",
1324
+ "pt",
1325
+ "hi",
1326
+ "es",
1327
+ "th"
1328
+ ],
1329
+ "model_ability": [
1330
+ "chat",
1331
+ "vision"
1332
+ ],
1333
+ "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1334
+ "model_specs": [
1335
+ {
1336
+ "model_format": "pytorch",
1337
+ "model_size_in_billions": 11,
1338
+ "quantizations": [
1339
+ "none"
1340
+ ],
1341
+ "model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
1342
+ },
1343
+ {
1344
+ "model_format": "pytorch",
1345
+ "model_size_in_billions": 90,
1346
+ "quantizations": [
1347
+ "none"
1348
+ ],
1349
+ "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
1350
+ }
1351
+ ],
1352
+ "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
1353
+ "stop_token_ids": [
1354
+ 128001,
1355
+ 128008,
1356
+ 128009
1357
+ ],
1358
+ "stop": [
1359
+ "<|end_of_text|>",
1360
+ "<|eot_id|>",
1361
+ "<|eom_id|>"
1362
+ ]
1363
+ },
1364
+ {
1365
+ "version": 1,
1366
+ "context_length": 131072,
1367
+ "model_name": "llama-3.2-vision",
1368
+ "model_lang": [
1369
+ "en",
1370
+ "de",
1371
+ "fr",
1372
+ "it",
1373
+ "pt",
1374
+ "hi",
1375
+ "es",
1376
+ "th"
1377
+ ],
1378
+ "model_ability": [
1379
+ "generate",
1380
+ "vision"
1381
+ ],
1382
+ "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1383
+ "model_specs": [
1384
+ {
1385
+ "model_format": "pytorch",
1386
+ "model_size_in_billions": 11,
1387
+ "quantizations": [
1388
+ "none"
1389
+ ],
1390
+ "model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
1391
+ },
1392
+ {
1393
+ "model_format": "pytorch",
1394
+ "model_size_in_billions": 90,
1395
+ "quantizations": [
1396
+ "none"
1397
+ ],
1398
+ "model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
1399
+ }
1400
+ ]
1401
+ },
1315
1402
  {
1316
1403
  "version": 1,
1317
1404
  "context_length": 2048,
@@ -363,6 +363,97 @@
363
363
  "<|eom_id|>"
364
364
  ]
365
365
  },
366
+ {
367
+ "version": 1,
368
+ "context_length": 131072,
369
+ "model_name": "llama-3.2-vision-instruct",
370
+ "model_lang": [
371
+ "en",
372
+ "de",
373
+ "fr",
374
+ "it",
375
+ "pt",
376
+ "hi",
377
+ "es",
378
+ "th"
379
+ ],
380
+ "model_ability": [
381
+ "chat",
382
+ "vision"
383
+ ],
384
+ "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
385
+ "model_specs": [
386
+ {
387
+ "model_format": "pytorch",
388
+ "model_size_in_billions": 11,
389
+ "quantizations": [
390
+ "none"
391
+ ],
392
+ "model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
393
+ "model_hub": "modelscope"
394
+ },
395
+ {
396
+ "model_format": "pytorch",
397
+ "model_size_in_billions": 90,
398
+ "quantizations": [
399
+ "none"
400
+ ],
401
+ "model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
402
+ "model_hub": "modelscope"
403
+ }
404
+ ],
405
+ "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
406
+ "stop_token_ids": [
407
+ 128001,
408
+ 128008,
409
+ 128009
410
+ ],
411
+ "stop": [
412
+ "<|end_of_text|>",
413
+ "<|eot_id|>",
414
+ "<|eom_id|>"
415
+ ]
416
+ },
417
+ {
418
+ "version": 1,
419
+ "context_length": 131072,
420
+ "model_name": "llama-3.2-vision",
421
+ "model_lang": [
422
+ "en",
423
+ "de",
424
+ "fr",
425
+ "it",
426
+ "pt",
427
+ "hi",
428
+ "es",
429
+ "th"
430
+ ],
431
+ "model_ability": [
432
+ "generate",
433
+ "vision"
434
+ ],
435
+ "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
436
+ "model_specs": [
437
+ {
438
+ "model_format": "pytorch",
439
+ "model_size_in_billions": 11,
440
+ "quantizations": [
441
+ "none"
442
+ ],
443
+ "model_id": "LLM-Research/Llama-3.2-11B-Vision",
444
+ "model_hub": "modelscope"
445
+ },
446
+ {
447
+ "model_format": "pytorch",
448
+ "model_size_in_billions": 90,
449
+ "quantizations": [
450
+ "none"
451
+ ],
452
+ "model_id": "LLM-Research/Llama-3.2-90B-Vision",
453
+ "model_hub": "modelscope"
454
+ }
455
+ ]
456
+ },
366
457
  {
367
458
  "version": 1,
368
459
  "context_length": 2048,
@@ -163,7 +163,6 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
163
163
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
164
164
  VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
165
165
 
166
-
167
166
  if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
168
167
  VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
169
168
  VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
@@ -177,6 +176,8 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
177
176
  VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
178
177
 
179
178
  if VLLM_INSTALLED and vllm.__version__ >= "0.6.3":
179
+ VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
180
+ VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
180
181
  VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
181
182
 
182
183
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.16.2
3
+ Version: 0.16.3
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -42,6 +42,7 @@ Requires-Dist: nvidia-ml-py
42
42
  Requires-Dist: async-timeout
43
43
  Requires-Dist: peft
44
44
  Requires-Dist: timm
45
+ Requires-Dist: setproctitle
45
46
  Provides-Extra: all
46
47
  Requires-Dist: llama-cpp-python!=0.2.58,>=0.2.25; extra == "all"
47
48
  Requires-Dist: transformers>=4.43.2; extra == "all"
@@ -71,7 +72,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "all"
71
72
  Requires-Dist: librosa; extra == "all"
72
73
  Requires-Dist: xxhash; extra == "all"
73
74
  Requires-Dist: torchaudio; extra == "all"
74
- Requires-Dist: ChatTTS>=0.2; extra == "all"
75
+ Requires-Dist: ChatTTS>=0.2.1; extra == "all"
75
76
  Requires-Dist: lightning>=2.0.0; extra == "all"
76
77
  Requires-Dist: hydra-core>=1.3.2; extra == "all"
77
78
  Requires-Dist: inflect; extra == "all"
@@ -105,7 +106,7 @@ Requires-Dist: WeTextProcessing<1.0.4; extra == "audio"
105
106
  Requires-Dist: librosa; extra == "audio"
106
107
  Requires-Dist: xxhash; extra == "audio"
107
108
  Requires-Dist: torchaudio; extra == "audio"
108
- Requires-Dist: ChatTTS>=0.2; extra == "audio"
109
+ Requires-Dist: ChatTTS>=0.2.1; extra == "audio"
109
110
  Requires-Dist: tiktoken; extra == "audio"
110
111
  Requires-Dist: torch>=2.0.0; extra == "audio"
111
112
  Requires-Dist: lightning>=2.0.0; extra == "audio"
@@ -1,8 +1,8 @@
1
1
  xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
2
2
  xinference/_compat.py,sha256=xFztCfyrq3O_4bssL_ygghYkfxicv_ZhiX2YDDWHf-k,3571
3
- xinference/_version.py,sha256=LLdtJFZnTO6_OxxmoVVh6cxLwAakRJfTVLW8tmmb1Fs,498
4
- xinference/conftest.py,sha256=56HYQjsAJcQrpZSmskniPqH9dLoW-i3Oud6NVTtc4io,9752
5
- xinference/constants.py,sha256=l_aIN20C_NwitSEHFvrIqFvcW8Kg9SPX6NFEaPBu0VQ,3825
3
+ xinference/_version.py,sha256=ZJMSF8nqOAMuCeAs35nQ2pCDZSaWMd6E2vS-3qLZTSc,498
4
+ xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
5
+ xinference/constants.py,sha256=VMj62qQ4h36Jt-AmH5g6hmJJteSlKrA3r47K7bGWEPc,3859
6
6
  xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
7
7
  xinference/fields.py,sha256=0UtBFaDNzn1n9MRjyTkNrolsIML-TpZfudWOejqjni8,5245
8
8
  xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
@@ -25,19 +25,19 @@ xinference/core/chat_interface.py,sha256=Kiqs1XOXgYBlP7DOXLEXaFjbVuS0yC1-dXJyxrx
25
25
  xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
26
26
  xinference/core/image_interface.py,sha256=5Iuoiw3g2TvgOYi3gRIAGApve2nNzfMPduRrBHvd1NY,13755
27
27
  xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
28
- xinference/core/model.py,sha256=GEOqKDllyZBAPOUk7ujt-c88AfCgE7-bgYplzl3XOCk,38613
28
+ xinference/core/model.py,sha256=7BWvhZmLN2joYCCBWTqiVCMX0moGttz8Fyl15OY9hT8,39587
29
29
  xinference/core/progress_tracker.py,sha256=LIF6CLIlnEoSBkuDCraJktDOzZ31mQ4HOo6EVr3KpQM,6453
30
30
  xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
31
31
  xinference/core/scheduler.py,sha256=gdj3SyP_jelJ86vTRrgnFynhxz5JSwLRsQgx8PTtBi8,15671
32
32
  xinference/core/status_guard.py,sha256=4an1KjUOhCStgRQUw1VSzXcycXUtvhxwiMREKKcl1UI,2828
33
- xinference/core/supervisor.py,sha256=Wkjhk1tfRuhyQmcVNrHZApWO09MDA5-Uu4u2p1GBj3I,52964
34
- xinference/core/utils.py,sha256=pFggqUjfsB9ME6V0VqsppN7KAHNrqpxMuJsIUPNkwoM,8745
35
- xinference/core/worker.py,sha256=MmGZuPZlI-DrC3VahkSZjGhpw9S9ISVGsxWhBlKNQMk,46367
33
+ xinference/core/supervisor.py,sha256=Z7cY28M0OeY27-z-OhB9f7BDGs_TVvbSsez1rEJjpdo,52923
34
+ xinference/core/utils.py,sha256=iY9Oog3M-k3OoUJFUfIbcWUQ94Yq0T9iIG_b2iPudP0,8658
35
+ xinference/core/worker.py,sha256=YIlaQosBRj_VStfZGPfWnT2ie13GW8K4NNEP5qz28lI,46402
36
36
  xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
37
- xinference/deploy/cmdline.py,sha256=YNXbPIT9zJIp5EQzl_rH5KwDDYqBd2CbaOVF8hA0lws,48120
37
+ xinference/deploy/cmdline.py,sha256=yQI6KuRUzih0rs_fInp2Lr3rwkOjBOM0eydPaF7VKDQ,48385
38
38
  xinference/deploy/local.py,sha256=gcH6WfTxfhjvNkxxKZH3tcGtXV48BEPoaLWYztZHaeo,3954
39
39
  xinference/deploy/supervisor.py,sha256=68rB2Ey5KFeF6zto9YGbw3P8QLZmF_KSh1NwH_pNP4w,2986
40
- xinference/deploy/utils.py,sha256=71xnPSjjF3XDZIYmlJ59Fbr7mswWERtNdjfdYGwyT_I,6703
40
+ xinference/deploy/utils.py,sha256=jdL7i2WV6u_BZ8IiE1d3YktvCARcB3ntzMQ5rHGD5DM,6756
41
41
  xinference/deploy/worker.py,sha256=VQ71ClWpeGsyFgDmcOes2ub1cil10cBjhFLHYeuVwC4,2974
42
42
  xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
43
43
  xinference/deploy/test/test_cmdline.py,sha256=m8xDzjtDuAJy0QkvYVJIZDuTB29cmYBV0d231JyRCPU,7714
@@ -51,7 +51,7 @@ xinference/model/audio/cosyvoice.py,sha256=Enur1Y4Xa-mpr7wwnoXWwhyh7PUAjrHZ8DV91
51
51
  xinference/model/audio/custom.py,sha256=8GXBRmTtR-GY03-E91nlRGTIuabCRzlt20ecU6Un6Y8,4985
52
52
  xinference/model/audio/fish_speech.py,sha256=v2WVEV-BLWnbiDvqrx8WTGE_YNKmd9QoAF1LZBXWxn0,7310
53
53
  xinference/model/audio/funasr.py,sha256=65z7U7_F14CCP-jg6BpeY3_49FK7Y5OCRSzrhhsklCg,4075
54
- xinference/model/audio/model_spec.json,sha256=JLgT4fKZuD5jz5cBO_KIFkSm_6a6UEW6z0YVrfQJJkI,5120
54
+ xinference/model/audio/model_spec.json,sha256=dHk9t-wBpQ7eso_6_csEO0LwTOoVucq_dAN9PxVjv5M,5120
55
55
  xinference/model/audio/model_spec_modelscope.json,sha256=U82E5vZahi4si6kpCjdp2FAG2lCpQ7s7w_1t6lj2ysI,2038
56
56
  xinference/model/audio/utils.py,sha256=pwo5cHh8nvhyBa9f-17QaVpXMSjmbpGbPYKwBBtEhGM,717
57
57
  xinference/model/audio/whisper.py,sha256=PQL7rebGC7WlIOItuDtjdEtSJtlhxFkolot-Fj-8uDU,7982
@@ -82,11 +82,11 @@ xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17k
82
82
  xinference/model/image/stable_diffusion/core.py,sha256=qqMjFcM7KpjQc79irWhTpweIVfenEcsSi6g_WDK7CFM,22982
83
83
  xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
84
84
  xinference/model/llm/__init__.py,sha256=9g9dFG2XuNDCTLE5vuJ6kCT-rqe9MfN56aEapyXaJ5M,13938
85
- xinference/model/llm/core.py,sha256=fBKIi3zJ-37v7o1ON8_YyDF-44uJ34jYgUktVJOeQO0,8187
86
- xinference/model/llm/llm_family.json,sha256=BiJwRSTFjKUErru4Mqek-P6JAlbFA1eGT9xzabfwBRc,290116
85
+ xinference/model/llm/core.py,sha256=g-luuAjZizrPunhyFE9IRjn57l0g6FY_1xUwtlRegbs,8151
86
+ xinference/model/llm/llm_family.json,sha256=9pnfZbFv7XnsiW6vR3g8VpcIhdi4wjSZSCcRLnl5zuc,292604
87
87
  xinference/model/llm/llm_family.py,sha256=tI2wPefd7v-PWcVhUO2qy6iGob_ioeNCwAQQzal-2o4,39549
88
88
  xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
89
- xinference/model/llm/llm_family_modelscope.json,sha256=5sOuthTDH9NisEw_3V22WdQgA_lQY5fl9vv-XYYwfVY,219124
89
+ xinference/model/llm/llm_family_modelscope.json,sha256=DFKSCauDGx0nHZuyFRBpp4Kau0I5q-Aqf0Lrl_B69u4,221744
90
90
  xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
91
91
  xinference/model/llm/memory.py,sha256=NEIMw6wWaF9S_bnBYq-EyuDhVbUEEeceQhwE1iwsrhI,10207
92
92
  xinference/model/llm/utils.py,sha256=DUC6jPr1-kPNsgc4J5MXNSMVgDlPLfQiitLGfdJxVxM,23596
@@ -120,7 +120,7 @@ xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjD
120
120
  xinference/model/llm/transformers/utils.py,sha256=Ej9Tu2yVAotfXMFsl30QlYXLZTODU6Pv_UppsGGUiSw,19185
121
121
  xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
122
122
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
123
- xinference/model/llm/vllm/core.py,sha256=5_aClQ1m37KpT6pyvSm8Kt0744mMqYrCkLC43BLHhI8,31391
123
+ xinference/model/llm/vllm/core.py,sha256=gflboRHy4JvhDG6G2bjPgidgNFTU2dDepbTZBmeDGlY,31516
124
124
  xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
125
125
  xinference/model/rerank/__init__.py,sha256=wRpf1bOMfmAsuEKEGczMTB5fWEvuqltlJbIbRb-x8Ko,3483
126
126
  xinference/model/rerank/core.py,sha256=1ef4Nb7z9z6-7-_Rcjw7VLm2AJvMlmXeIZd2Ap8VSQg,14405
@@ -15529,9 +15529,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15529
15529
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15530
15530
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15531
15531
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15532
- xinference-0.16.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15533
- xinference-0.16.2.dist-info/METADATA,sha256=JD_uweW_grkKlqWWMtszZpXILC_LoQM7WLh1Y1RjvyE,21010
15534
- xinference-0.16.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
15535
- xinference-0.16.2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15536
- xinference-0.16.2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15537
- xinference-0.16.2.dist-info/RECORD,,
15532
+ xinference-0.16.3.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15533
+ xinference-0.16.3.dist-info/METADATA,sha256=7X0n1tJuGmaammNKKtZ16nV03LBJb6HcQSFsPkFVKy8,21042
15534
+ xinference-0.16.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
15535
+ xinference-0.16.3.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15536
+ xinference-0.16.3.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15537
+ xinference-0.16.3.dist-info/RECORD,,