xinference 0.16.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (60) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +62 -11
  3. xinference/client/restful/restful_client.py +8 -2
  4. xinference/conftest.py +0 -8
  5. xinference/constants.py +2 -0
  6. xinference/core/model.py +44 -5
  7. xinference/core/supervisor.py +13 -7
  8. xinference/core/utils.py +76 -12
  9. xinference/core/worker.py +5 -4
  10. xinference/deploy/cmdline.py +5 -0
  11. xinference/deploy/utils.py +7 -4
  12. xinference/model/audio/model_spec.json +2 -2
  13. xinference/model/image/stable_diffusion/core.py +5 -2
  14. xinference/model/llm/core.py +1 -3
  15. xinference/model/llm/llm_family.json +263 -4
  16. xinference/model/llm/llm_family_modelscope.json +302 -0
  17. xinference/model/llm/mlx/core.py +45 -2
  18. xinference/model/llm/vllm/core.py +2 -1
  19. xinference/model/rerank/core.py +11 -4
  20. xinference/thirdparty/fish_speech/fish_speech/conversation.py +254 -0
  21. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  22. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  23. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  24. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  25. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  26. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +76 -11
  27. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  28. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  29. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +32 -1
  30. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  31. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  32. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  33. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  34. xinference/thirdparty/fish_speech/tools/api.py +578 -75
  35. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  36. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  37. xinference/thirdparty/fish_speech/tools/llama/generate.py +393 -9
  38. xinference/thirdparty/fish_speech/tools/msgpack_api.py +90 -29
  39. xinference/thirdparty/fish_speech/tools/post_api.py +37 -15
  40. xinference/thirdparty/fish_speech/tools/schema.py +187 -0
  41. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  42. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  43. xinference/thirdparty/fish_speech/tools/webui.py +138 -75
  44. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/METADATA +26 -3
  45. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/RECORD +49 -56
  46. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/WHEEL +1 -1
  47. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  48. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  49. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  50. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  51. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  52. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  53. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  54. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  55. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  56. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  57. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  58. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/LICENSE +0 -0
  59. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/entry_points.txt +0 -0
  60. {xinference-0.16.2.dist-info → xinference-1.0.0.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ from .utils import (
43
43
  get_log_file,
44
44
  get_timestamp_ms,
45
45
  handle_click_args_type,
46
+ set_envs,
46
47
  )
47
48
 
48
49
  try:
@@ -106,6 +107,8 @@ def start_local_cluster(
106
107
  XINFERENCE_LOG_MAX_BYTES,
107
108
  )
108
109
  logging.config.dictConfig(dict_config) # type: ignore
110
+ # refer to https://huggingface.co/docs/transformers/main_classes/logging
111
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
109
112
 
110
113
  main(
111
114
  host=host,
@@ -280,6 +283,7 @@ def supervisor(
280
283
  XINFERENCE_LOG_MAX_BYTES,
281
284
  )
282
285
  logging.config.dictConfig(dict_config) # type: ignore
286
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
283
287
 
284
288
  main(
285
289
  host=host,
@@ -342,6 +346,7 @@ def worker(
342
346
  XINFERENCE_LOG_MAX_BYTES,
343
347
  )
344
348
  logging.config.dictConfig(dict_config) # type: ignore
349
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
345
350
 
346
351
  endpoint = get_endpoint(endpoint)
347
352
 
@@ -134,10 +134,6 @@ def get_config_dict(
134
134
  "propagate": False,
135
135
  },
136
136
  },
137
- "root": {
138
- "level": "WARN",
139
- "handlers": ["stream_handler", "file_handler"],
140
- },
141
137
  }
142
138
  return config_dict
143
139
 
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
220
216
  pass
221
217
 
222
218
  return arg
219
+
220
+
221
+ def set_envs(key: str, value: str):
222
+ """
223
+ Environment variables are set by the parent process and inherited by child processes
224
+ """
225
+ os.environ[key] = value
@@ -127,7 +127,7 @@
127
127
  "model_name": "ChatTTS",
128
128
  "model_family": "ChatTTS",
129
129
  "model_id": "2Noise/ChatTTS",
130
- "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
130
+ "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
131
131
  "model_ability": "text-to-audio",
132
132
  "multilingual": true
133
133
  },
@@ -159,7 +159,7 @@
159
159
  "model_name": "FishSpeech-1.4",
160
160
  "model_family": "FishAudio",
161
161
  "model_id": "fishaudio/fish-speech-1.4",
162
- "model_revision": "3c49651b8e583b6b13f55e375432e0d57e1aa84d",
162
+ "model_revision": "069c573759936b35191d3380deb89183c0656f59",
163
163
  "model_ability": "text-to-audio",
164
164
  "multilingual": true
165
165
  }
@@ -17,9 +17,11 @@ import gc
17
17
  import inspect
18
18
  import itertools
19
19
  import logging
20
+ import os
20
21
  import re
21
22
  import sys
22
23
  import warnings
24
+ from glob import glob
23
25
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
24
26
 
25
27
  import PIL.Image
@@ -194,8 +196,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
194
196
  if sys.platform != "darwin" and torch_dtype is None:
195
197
  # The following params crashes on Mac M2
196
198
  self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
197
- self._kwargs["variant"] = "fp16"
198
- self._kwargs["use_safetensors"] = True
199
+ self._kwargs["use_safetensors"] = any(
200
+ glob(os.path.join(self._model_path, "*/*.safetensors"))
201
+ )
199
202
  if isinstance(torch_dtype, str):
200
203
  self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)
201
204
 
@@ -52,9 +52,7 @@ class LLM(abc.ABC):
52
52
  *args,
53
53
  **kwargs,
54
54
  ):
55
- self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
56
- replica_model_uid
57
- )
55
+ self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
58
56
  self.model_family = model_family
59
57
  self.model_spec = model_spec
60
58
  self.quantization = quantization
@@ -1312,6 +1312,93 @@
1312
1312
  "<|eom_id|>"
1313
1313
  ]
1314
1314
  },
1315
+ {
1316
+ "version": 1,
1317
+ "context_length": 131072,
1318
+ "model_name": "llama-3.2-vision-instruct",
1319
+ "model_lang": [
1320
+ "en",
1321
+ "de",
1322
+ "fr",
1323
+ "it",
1324
+ "pt",
1325
+ "hi",
1326
+ "es",
1327
+ "th"
1328
+ ],
1329
+ "model_ability": [
1330
+ "chat",
1331
+ "vision"
1332
+ ],
1333
+ "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1334
+ "model_specs": [
1335
+ {
1336
+ "model_format": "pytorch",
1337
+ "model_size_in_billions": 11,
1338
+ "quantizations": [
1339
+ "none"
1340
+ ],
1341
+ "model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
1342
+ },
1343
+ {
1344
+ "model_format": "pytorch",
1345
+ "model_size_in_billions": 90,
1346
+ "quantizations": [
1347
+ "none"
1348
+ ],
1349
+ "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
1350
+ }
1351
+ ],
1352
+ "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
1353
+ "stop_token_ids": [
1354
+ 128001,
1355
+ 128008,
1356
+ 128009
1357
+ ],
1358
+ "stop": [
1359
+ "<|end_of_text|>",
1360
+ "<|eot_id|>",
1361
+ "<|eom_id|>"
1362
+ ]
1363
+ },
1364
+ {
1365
+ "version": 1,
1366
+ "context_length": 131072,
1367
+ "model_name": "llama-3.2-vision",
1368
+ "model_lang": [
1369
+ "en",
1370
+ "de",
1371
+ "fr",
1372
+ "it",
1373
+ "pt",
1374
+ "hi",
1375
+ "es",
1376
+ "th"
1377
+ ],
1378
+ "model_ability": [
1379
+ "generate",
1380
+ "vision"
1381
+ ],
1382
+ "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1383
+ "model_specs": [
1384
+ {
1385
+ "model_format": "pytorch",
1386
+ "model_size_in_billions": 11,
1387
+ "quantizations": [
1388
+ "none"
1389
+ ],
1390
+ "model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
1391
+ },
1392
+ {
1393
+ "model_format": "pytorch",
1394
+ "model_size_in_billions": 90,
1395
+ "quantizations": [
1396
+ "none"
1397
+ ],
1398
+ "model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
1399
+ }
1400
+ ]
1401
+ },
1315
1402
  {
1316
1403
  "version": 1,
1317
1404
  "context_length": 2048,
@@ -8118,6 +8205,16 @@
8118
8205
  ],
8119
8206
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8120
8207
  "model_specs": [
8208
+ {
8209
+ "model_format": "pytorch",
8210
+ "model_size_in_billions": "0_5",
8211
+ "quantizations": [
8212
+ "4-bit",
8213
+ "8-bit",
8214
+ "none"
8215
+ ],
8216
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B"
8217
+ },
8121
8218
  {
8122
8219
  "model_format": "pytorch",
8123
8220
  "model_size_in_billions": "1_5",
@@ -8126,8 +8223,17 @@
8126
8223
  "8-bit",
8127
8224
  "none"
8128
8225
  ],
8129
- "model_id": "Qwen/Qwen2.5-Coder-1.5B",
8130
- "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
8226
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B"
8227
+ },
8228
+ {
8229
+ "model_format": "pytorch",
8230
+ "model_size_in_billions": "3",
8231
+ "quantizations": [
8232
+ "4-bit",
8233
+ "8-bit",
8234
+ "none"
8235
+ ],
8236
+ "model_id": "Qwen/Qwen2.5-Coder-3B"
8131
8237
  },
8132
8238
  {
8133
8239
  "model_format": "pytorch",
@@ -8137,8 +8243,27 @@
8137
8243
  "8-bit",
8138
8244
  "none"
8139
8245
  ],
8140
- "model_id": "Qwen/Qwen2.5-Coder-7B",
8141
- "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
8246
+ "model_id": "Qwen/Qwen2.5-Coder-7B"
8247
+ },
8248
+ {
8249
+ "model_format": "pytorch",
8250
+ "model_size_in_billions": 14,
8251
+ "quantizations": [
8252
+ "4-bit",
8253
+ "8-bit",
8254
+ "none"
8255
+ ],
8256
+ "model_id": "Qwen/Qwen2.5-Coder-14B"
8257
+ },
8258
+ {
8259
+ "model_format": "pytorch",
8260
+ "model_size_in_billions": 32,
8261
+ "quantizations": [
8262
+ "4-bit",
8263
+ "8-bit",
8264
+ "none"
8265
+ ],
8266
+ "model_id": "Qwen/Qwen2.5-Coder-32B"
8142
8267
  }
8143
8268
  ]
8144
8269
  },
@@ -8156,6 +8281,16 @@
8156
8281
  ],
8157
8282
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8158
8283
  "model_specs": [
8284
+ {
8285
+ "model_format": "pytorch",
8286
+ "model_size_in_billions": "0_5",
8287
+ "quantizations": [
8288
+ "4-bit",
8289
+ "8-bit",
8290
+ "none"
8291
+ ],
8292
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
8293
+ },
8159
8294
  {
8160
8295
  "model_format": "pytorch",
8161
8296
  "model_size_in_billions": "1_5",
@@ -8166,6 +8301,16 @@
8166
8301
  ],
8167
8302
  "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8168
8303
  },
8304
+ {
8305
+ "model_format": "pytorch",
8306
+ "model_size_in_billions": "3",
8307
+ "quantizations": [
8308
+ "4-bit",
8309
+ "8-bit",
8310
+ "none"
8311
+ ],
8312
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
8313
+ },
8169
8314
  {
8170
8315
  "model_format": "pytorch",
8171
8316
  "model_size_in_billions": 7,
@@ -8176,6 +8321,53 @@
8176
8321
  ],
8177
8322
  "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8178
8323
  },
8324
+ {
8325
+ "model_format": "pytorch",
8326
+ "model_size_in_billions": 14,
8327
+ "quantizations": [
8328
+ "4-bit",
8329
+ "8-bit",
8330
+ "none"
8331
+ ],
8332
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
8333
+ },
8334
+ {
8335
+ "model_format": "pytorch",
8336
+ "model_size_in_billions": 32,
8337
+ "quantizations": [
8338
+ "4-bit",
8339
+ "8-bit",
8340
+ "none"
8341
+ ],
8342
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
8343
+ },
8344
+ {
8345
+ "model_format": "gptq",
8346
+ "model_size_in_billions": "0_5",
8347
+ "quantizations": [
8348
+ "Int4",
8349
+ "Int8"
8350
+ ],
8351
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
8352
+ },
8353
+ {
8354
+ "model_format": "gptq",
8355
+ "model_size_in_billions": "1_5",
8356
+ "quantizations": [
8357
+ "Int4",
8358
+ "Int8"
8359
+ ],
8360
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
8361
+ },
8362
+ {
8363
+ "model_format": "gptq",
8364
+ "model_size_in_billions": "3",
8365
+ "quantizations": [
8366
+ "Int4",
8367
+ "Int8"
8368
+ ],
8369
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
8370
+ },
8179
8371
  {
8180
8372
  "model_format": "gptq",
8181
8373
  "model_size_in_billions": "7",
@@ -8185,6 +8377,73 @@
8185
8377
  ],
8186
8378
  "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
8187
8379
  },
8380
+ {
8381
+ "model_format": "gptq",
8382
+ "model_size_in_billions": "14",
8383
+ "quantizations": [
8384
+ "Int4",
8385
+ "Int8"
8386
+ ],
8387
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
8388
+ },
8389
+ {
8390
+ "model_format": "gptq",
8391
+ "model_size_in_billions": "32",
8392
+ "quantizations": [
8393
+ "Int4",
8394
+ "Int8"
8395
+ ],
8396
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
8397
+ },
8398
+ {
8399
+ "model_format": "awq",
8400
+ "model_size_in_billions": "0_5",
8401
+ "quantizations": [
8402
+ "Int4"
8403
+ ],
8404
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
8405
+ },
8406
+ {
8407
+ "model_format": "awq",
8408
+ "model_size_in_billions": "1_5",
8409
+ "quantizations": [
8410
+ "Int4"
8411
+ ],
8412
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
8413
+ },
8414
+ {
8415
+ "model_format": "awq",
8416
+ "model_size_in_billions": "3",
8417
+ "quantizations": [
8418
+ "Int4"
8419
+ ],
8420
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
8421
+ },
8422
+ {
8423
+ "model_format": "awq",
8424
+ "model_size_in_billions": "7",
8425
+ "quantizations": [
8426
+ "Int4"
8427
+ ],
8428
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
8429
+ },
8430
+ {
8431
+ "model_format": "awq",
8432
+ "model_size_in_billions": "14",
8433
+ "quantizations": [
8434
+ "Int4"
8435
+ ],
8436
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
8437
+ },
8438
+ {
8439
+ "model_format": "awq",
8440
+ "model_size_in_billions": "32",
8441
+ "quantizations": [
8442
+ "Int4"
8443
+ ],
8444
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
8445
+ },
8446
+
8188
8447
  {
8189
8448
  "model_format": "ggufv2",
8190
8449
  "model_size_in_billions": "1_5",