xinference 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +462 -3
- xinference/client/restful/async_restful_client.py +158 -5
- xinference/client/restful/restful_client.py +131 -0
- xinference/core/supervisor.py +12 -0
- xinference/model/audio/model_spec.json +20 -20
- xinference/model/image/model_spec.json +159 -159
- xinference/model/llm/__init__.py +2 -2
- xinference/model/llm/llm_family.json +843 -180
- xinference/model/llm/mlx/distributed_models/core.py +41 -0
- xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
- xinference/model/llm/sglang/core.py +20 -6
- xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
- xinference/model/llm/transformers/chatglm.py +3 -0
- xinference/model/llm/transformers/core.py +129 -36
- xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
- xinference/model/llm/transformers/utils.py +23 -0
- xinference/model/llm/utils.py +37 -24
- xinference/model/llm/vllm/core.py +128 -69
- xinference/model/utils.py +74 -31
- xinference/thirdparty/audiotools/core/audio_signal.py +6 -6
- xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
- xinference/thirdparty/melo/text/chinese_mix.py +2 -2
- xinference/types.py +9 -0
- xinference/ui/web/ui/build/asset-manifest.json +3 -3
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/js/{main.d192c4f3.js → main.45e78536.js} +3 -3
- xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
- xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/METADATA +7 -5
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/RECORD +36 -35
- xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +0 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +0 -1
- /xinference/ui/web/ui/build/static/js/{main.d192c4f3.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -0
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -131,7 +131,7 @@ except ImportError:
|
|
|
131
131
|
VLLM_INSTALLED = False
|
|
132
132
|
VLLM_VERSION = None
|
|
133
133
|
|
|
134
|
-
|
|
134
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST: List[str] = []
|
|
135
135
|
VLLM_SUPPORTED_MODELS = [
|
|
136
136
|
"llama-2",
|
|
137
137
|
"llama-3",
|
|
@@ -229,34 +229,37 @@ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.5.3"):
|
|
|
229
229
|
VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
|
|
230
230
|
|
|
231
231
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.1"):
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
232
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("internvl2")
|
|
233
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5")
|
|
234
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5-MPO")
|
|
235
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL3")
|
|
236
236
|
|
|
237
237
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.2"):
|
|
238
238
|
VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
|
|
239
239
|
|
|
240
240
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.3"):
|
|
241
241
|
VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
242
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("llama-3.2-vision-instruct")
|
|
243
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-vl-instruct")
|
|
244
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("QvQ-72B-Preview")
|
|
245
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio")
|
|
245
246
|
|
|
246
247
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.0"):
|
|
247
248
|
VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
|
|
248
249
|
|
|
249
250
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.2"):
|
|
250
|
-
|
|
251
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-vl-instruct")
|
|
251
252
|
VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
|
|
253
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio-instruct")
|
|
252
254
|
|
|
253
255
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.3"):
|
|
254
256
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
|
|
255
257
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwenLong-l1")
|
|
258
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-omni")
|
|
256
259
|
|
|
257
260
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.0"):
|
|
258
261
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
|
|
259
|
-
|
|
262
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("gemma-3-it")
|
|
260
263
|
|
|
261
264
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
|
|
262
265
|
VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
|
|
@@ -272,7 +275,7 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
|
|
|
272
275
|
|
|
273
276
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
|
|
274
277
|
VLLM_SUPPORTED_CHAT_MODELS.append("Ernie4.5")
|
|
275
|
-
|
|
278
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.1v-thinking")
|
|
276
279
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
|
|
277
280
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
|
|
278
281
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
|
|
@@ -280,7 +283,7 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
|
|
|
280
283
|
|
|
281
284
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
|
|
282
285
|
VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")
|
|
283
|
-
|
|
286
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.5v")
|
|
284
287
|
VLLM_SUPPORTED_CHAT_MODELS.append("KAT-V1")
|
|
285
288
|
|
|
286
289
|
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
|
|
@@ -291,9 +294,11 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
|
|
|
291
294
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
|
|
292
295
|
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
|
|
293
296
|
|
|
294
|
-
if VLLM_INSTALLED and VLLM_VERSION
|
|
295
|
-
|
|
296
|
-
|
|
297
|
+
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.11.0"):
|
|
298
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Thinking")
|
|
299
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Instruct")
|
|
300
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Thinking")
|
|
301
|
+
VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Instruct")
|
|
297
302
|
|
|
298
303
|
|
|
299
304
|
class VLLMModel(LLM):
|
|
@@ -545,7 +550,7 @@ class VLLMModel(LLM):
|
|
|
545
550
|
# patch vllm Executor.get_class
|
|
546
551
|
Executor.get_class = lambda vllm_config: executor_cls
|
|
547
552
|
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
|
|
548
|
-
except:
|
|
553
|
+
except: # noqa: E722
|
|
549
554
|
logger.exception("Creating vllm engine failed")
|
|
550
555
|
self._loading_error = sys.exc_info()
|
|
551
556
|
|
|
@@ -714,7 +719,7 @@ class VLLMModel(LLM):
|
|
|
714
719
|
logger.info("Detecting vLLM is not health, prepare to quit the process")
|
|
715
720
|
try:
|
|
716
721
|
self.stop()
|
|
717
|
-
except:
|
|
722
|
+
except: # noqa: E722
|
|
718
723
|
# ignore error when stop
|
|
719
724
|
pass
|
|
720
725
|
# Just kill the process and let xinference auto-recover the model
|
|
@@ -857,7 +862,7 @@ class VLLMModel(LLM):
|
|
|
857
862
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
|
|
858
863
|
return False
|
|
859
864
|
if llm_spec.model_format == "pytorch":
|
|
860
|
-
if quantization != "none" and
|
|
865
|
+
if quantization != "none" and quantization is not None:
|
|
861
866
|
return False
|
|
862
867
|
if llm_spec.model_format == "awq":
|
|
863
868
|
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
@@ -988,7 +993,10 @@ class VLLMModel(LLM):
|
|
|
988
993
|
from vllm import TokensPrompt
|
|
989
994
|
|
|
990
995
|
token_ids = await asyncio.to_thread(
|
|
991
|
-
self._tokenize,
|
|
996
|
+
self._tokenize,
|
|
997
|
+
tokenizer,
|
|
998
|
+
prompt, # type: ignore
|
|
999
|
+
config,
|
|
992
1000
|
)
|
|
993
1001
|
return TokensPrompt(prompt_token_ids=token_ids)
|
|
994
1002
|
|
|
@@ -1082,18 +1090,43 @@ class VLLMModel(LLM):
|
|
|
1082
1090
|
logger.warning(f"Failed to create GuidedDecodingParams: {e}")
|
|
1083
1091
|
guided_options = None
|
|
1084
1092
|
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1093
|
+
try:
|
|
1094
|
+
import inspect
|
|
1095
|
+
|
|
1096
|
+
sp_sig = inspect.signature(SamplingParams)
|
|
1097
|
+
# For v0.9.2 and similar versions, prioritize guided_decoding over structured_outputs
|
|
1098
|
+
# structured_outputs was introduced later (around v0.11.0) and may not accept
|
|
1099
|
+
# GuidedDecodingParams in earlier versions even if the parameter exists
|
|
1100
|
+
if "guided_decoding" in sp_sig.parameters:
|
|
1101
|
+
sampling_params = SamplingParams(
|
|
1102
|
+
guided_decoding=guided_options, **sanitized_generate_config
|
|
1103
|
+
)
|
|
1104
|
+
elif "structured_outputs" in sp_sig.parameters:
|
|
1105
|
+
try:
|
|
1106
|
+
sampling_params = SamplingParams(
|
|
1107
|
+
structured_outputs=guided_options,
|
|
1108
|
+
**sanitized_generate_config,
|
|
1109
|
+
)
|
|
1110
|
+
except TypeError as e:
|
|
1111
|
+
if "structured_outputs" in str(e):
|
|
1112
|
+
# structured_outputs parameter exists but doesn't accept GuidedDecodingParams
|
|
1113
|
+
# Fall back to no guided decoding
|
|
1114
|
+
logger.warning(
|
|
1115
|
+
f"structured_outputs parameter failed: {e}. "
|
|
1116
|
+
"Falling back to no guided decoding for vLLM version compatibility."
|
|
1117
|
+
)
|
|
1118
|
+
sampling_params = SamplingParams(
|
|
1119
|
+
**sanitized_generate_config
|
|
1120
|
+
)
|
|
1121
|
+
else:
|
|
1122
|
+
raise
|
|
1123
|
+
else:
|
|
1124
|
+
sampling_params = SamplingParams(**sanitized_generate_config)
|
|
1125
|
+
except Exception as e:
|
|
1126
|
+
logger.warning(
|
|
1127
|
+
f"Failed to create SamplingParams with guided decoding: {e}"
|
|
1096
1128
|
)
|
|
1129
|
+
sampling_params = SamplingParams(**sanitized_generate_config)
|
|
1097
1130
|
else:
|
|
1098
1131
|
# ignore generate configs for older versions
|
|
1099
1132
|
sanitized_generate_config.pop("guided_json", None)
|
|
@@ -1111,7 +1144,9 @@ class VLLMModel(LLM):
|
|
|
1111
1144
|
# this requires tokenizing
|
|
1112
1145
|
tokenizer = await self._get_tokenizer(lora_request)
|
|
1113
1146
|
prompt_or_token_ids = await self._gen_tokens_prompt(
|
|
1114
|
-
tokenizer,
|
|
1147
|
+
tokenizer,
|
|
1148
|
+
prompt,
|
|
1149
|
+
sanitized_generate_config, # type: ignore
|
|
1115
1150
|
)
|
|
1116
1151
|
sampling_params.max_tokens = max_tokens = self._context_length - len( # type: ignore
|
|
1117
1152
|
prompt_or_token_ids["prompt_token_ids"] # type: ignore
|
|
@@ -1266,11 +1301,10 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1266
1301
|
]:
|
|
1267
1302
|
return False
|
|
1268
1303
|
if llm_spec.model_format == "pytorch":
|
|
1269
|
-
if quantization != "none" and
|
|
1304
|
+
if quantization != "none" and quantization is not None:
|
|
1270
1305
|
return False
|
|
1271
1306
|
if llm_spec.model_format == "awq":
|
|
1272
|
-
|
|
1273
|
-
if "4" not in quantization:
|
|
1307
|
+
if not any(q in quantization for q in ("4", "8")):
|
|
1274
1308
|
return False
|
|
1275
1309
|
if llm_spec.model_format == "gptq":
|
|
1276
1310
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
|
|
@@ -1430,7 +1464,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1430
1464
|
return self._to_chat_completion(c, self.reasoning_parser)
|
|
1431
1465
|
|
|
1432
1466
|
|
|
1433
|
-
class
|
|
1467
|
+
class VLLMMultiModel(VLLMModel, ChatModelMixin):
|
|
1434
1468
|
@classmethod
|
|
1435
1469
|
def match_json(
|
|
1436
1470
|
cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
|
|
@@ -1442,11 +1476,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1442
1476
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
|
|
1443
1477
|
return False
|
|
1444
1478
|
if llm_spec.model_format == "pytorch":
|
|
1445
|
-
if quantization != "none" and
|
|
1479
|
+
if quantization != "none" and quantization is not None:
|
|
1446
1480
|
return False
|
|
1447
1481
|
if llm_spec.model_format == "awq":
|
|
1448
|
-
|
|
1449
|
-
if "4" not in quantization:
|
|
1482
|
+
if not any(q in quantization for q in ("4", "8")):
|
|
1450
1483
|
return False
|
|
1451
1484
|
if llm_spec.model_format == "gptq":
|
|
1452
1485
|
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
|
|
@@ -1456,12 +1489,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1456
1489
|
if "4" not in quantization:
|
|
1457
1490
|
return False
|
|
1458
1491
|
if isinstance(llm_family, CustomLLMFamilyV2):
|
|
1459
|
-
if llm_family.model_family not in
|
|
1492
|
+
if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
|
|
1460
1493
|
return False
|
|
1461
1494
|
else:
|
|
1462
|
-
if llm_family.model_name not in
|
|
1495
|
+
if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
|
|
1463
1496
|
return False
|
|
1464
|
-
if
|
|
1497
|
+
if (
|
|
1498
|
+
"vision" not in llm_family.model_ability
|
|
1499
|
+
and "audio" not in llm_family.model_ability
|
|
1500
|
+
and "omni" not in llm_family.model_ability
|
|
1501
|
+
):
|
|
1465
1502
|
return False
|
|
1466
1503
|
return VLLM_INSTALLED
|
|
1467
1504
|
|
|
@@ -1470,13 +1507,21 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1470
1507
|
) -> VLLMModelConfig:
|
|
1471
1508
|
model_config = super()._sanitize_model_config(model_config)
|
|
1472
1509
|
if VLLM_VERSION >= version.parse("0.5.5"):
|
|
1473
|
-
model_config
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1510
|
+
if model_config.get("limit_mm_per_prompt"):
|
|
1511
|
+
model_config["limit_mm_per_prompt"] = json.loads(
|
|
1512
|
+
model_config.get("limit_mm_per_prompt") # type: ignore
|
|
1513
|
+
)
|
|
1514
|
+
else:
|
|
1515
|
+
if "omni" in self.model_family.model_ability:
|
|
1516
|
+
model_config["limit_mm_per_prompt"] = {
|
|
1517
|
+
"image": 2,
|
|
1518
|
+
"video": 2,
|
|
1519
|
+
"audio": 2,
|
|
1520
|
+
}
|
|
1521
|
+
elif "vision" in self.model_family.model_ability:
|
|
1522
|
+
model_config["limit_mm_per_prompt"] = {"image": 2, "video": 2}
|
|
1523
|
+
elif "audio" in self.model_family.model_ability:
|
|
1524
|
+
model_config["limit_mm_per_prompt"] = {"audio": 2}
|
|
1480
1525
|
return model_config
|
|
1481
1526
|
|
|
1482
1527
|
def _sanitize_chat_config(
|
|
@@ -1510,7 +1555,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1510
1555
|
multi_modal_data = prompt.get("multi_modal_data")
|
|
1511
1556
|
|
|
1512
1557
|
token_ids = await asyncio.to_thread(
|
|
1513
|
-
self._tokenize,
|
|
1558
|
+
self._tokenize,
|
|
1559
|
+
tokenizer,
|
|
1560
|
+
prompt_str,
|
|
1561
|
+
config, # type: ignore
|
|
1514
1562
|
)
|
|
1515
1563
|
return TokensPrompt(
|
|
1516
1564
|
prompt_token_ids=token_ids, multi_modal_data=multi_modal_data
|
|
@@ -1526,9 +1574,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1526
1574
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
1527
1575
|
|
|
1528
1576
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
1529
|
-
|
|
1577
|
+
audios, images, videos = None, None, None
|
|
1530
1578
|
if "internvl" not in model_family.lower():
|
|
1531
|
-
from
|
|
1579
|
+
from qwen_omni_utils import (
|
|
1580
|
+
process_audio_info,
|
|
1581
|
+
process_mm_info,
|
|
1582
|
+
process_vision_info,
|
|
1583
|
+
)
|
|
1532
1584
|
|
|
1533
1585
|
messages = self._transform_messages(messages)
|
|
1534
1586
|
|
|
@@ -1543,29 +1595,36 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1543
1595
|
if tools and model_family in QWEN_TOOL_CALL_FAMILY:
|
|
1544
1596
|
full_context_kwargs["tools"] = tools
|
|
1545
1597
|
assert self.model_family.chat_template is not None
|
|
1598
|
+
if "omni" in self.model_family.model_ability:
|
|
1599
|
+
audios, images, videos = process_mm_info(
|
|
1600
|
+
messages, use_audio_in_video=True
|
|
1601
|
+
)
|
|
1602
|
+
elif "audio" in self.model_family.model_ability:
|
|
1603
|
+
audios = process_audio_info(messages, use_audio_in_video=False)
|
|
1604
|
+
elif "vision" in self.model_family.model_ability:
|
|
1605
|
+
images, videos = process_vision_info( # type: ignore
|
|
1606
|
+
messages, return_video_kwargs=False
|
|
1607
|
+
)
|
|
1608
|
+
|
|
1546
1609
|
prompt = self.get_full_context(
|
|
1547
1610
|
messages, self.model_family.chat_template, **full_context_kwargs
|
|
1548
1611
|
)
|
|
1549
|
-
images, video_inputs = process_vision_info(messages)
|
|
1550
|
-
if video_inputs:
|
|
1551
|
-
raise ValueError("Not support video input now.")
|
|
1552
|
-
else:
|
|
1553
|
-
prompt, images = self.get_specific_prompt(model_family, messages)
|
|
1554
1612
|
|
|
1555
|
-
if not images:
|
|
1556
|
-
inputs = {
|
|
1557
|
-
"prompt": prompt,
|
|
1558
|
-
}
|
|
1559
|
-
elif len(images) == 1:
|
|
1560
|
-
inputs = {
|
|
1561
|
-
"prompt": prompt,
|
|
1562
|
-
"multi_modal_data": {"image": images[-1]}, # type: ignore
|
|
1563
|
-
}
|
|
1564
1613
|
else:
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1614
|
+
prompt, images = self.get_specific_prompt(model_family, messages)
|
|
1615
|
+
inputs = {"prompt": prompt, "multi_modal_data": {}, "mm_processor_kwargs": {}}
|
|
1616
|
+
if images:
|
|
1617
|
+
inputs["multi_modal_data"]["image"] = images
|
|
1618
|
+
if videos:
|
|
1619
|
+
inputs["multi_modal_data"]["video"] = videos
|
|
1620
|
+
if audios:
|
|
1621
|
+
inputs["multi_modal_data"]["audio"] = audios
|
|
1622
|
+
if "omni" in self.model_family.model_ability:
|
|
1623
|
+
inputs["mm_processor_kwargs"]["use_audio_in_video"] = True
|
|
1624
|
+
if inputs["multi_modal_data"] == {}:
|
|
1625
|
+
inputs.pop("multi_modal_data")
|
|
1626
|
+
if inputs["mm_processor_kwargs"] == {}:
|
|
1627
|
+
inputs.pop("mm_processor_kwargs")
|
|
1569
1628
|
generate_config = self._sanitize_chat_config(generate_config)
|
|
1570
1629
|
|
|
1571
1630
|
stream = generate_config.get("stream", None)
|
xinference/model/utils.py
CHANGED
|
@@ -315,6 +315,11 @@ def set_all_random_seed(seed: int):
|
|
|
315
315
|
|
|
316
316
|
|
|
317
317
|
class CancellableDownloader:
|
|
318
|
+
_global_lock = threading.Lock()
|
|
319
|
+
_active_instances = 0
|
|
320
|
+
_original_update = None # Class-level original update method
|
|
321
|
+
_patch_lock = threading.Lock() # Additional lock for patching operations
|
|
322
|
+
|
|
318
323
|
def __init__(
|
|
319
324
|
self,
|
|
320
325
|
cancel_error_cls: Type[BaseException] = asyncio.CancelledError,
|
|
@@ -325,23 +330,23 @@ class CancellableDownloader:
|
|
|
325
330
|
self._cancelled = threading.Event()
|
|
326
331
|
self._done_event = threading.Event()
|
|
327
332
|
self._cancel_error_cls = cancel_error_cls
|
|
328
|
-
self._original_update = None
|
|
329
333
|
# progress for tqdm that is main
|
|
330
334
|
self._main_progresses: Set[tqdm] = set()
|
|
331
335
|
# progress for file downloader
|
|
332
336
|
# mainly when tqdm unit is set
|
|
333
337
|
self._download_progresses: Set[tqdm] = set()
|
|
334
|
-
# tqdm
|
|
335
|
-
self.
|
|
338
|
+
# Instance-specific tqdm tracking
|
|
339
|
+
self._patched_instances: Set[int] = set()
|
|
336
340
|
|
|
337
341
|
def reset(self):
|
|
338
342
|
self._main_progresses.clear()
|
|
339
343
|
self._download_progresses.clear()
|
|
340
344
|
|
|
341
345
|
def get_progress(self) -> float:
|
|
342
|
-
if self.
|
|
343
|
-
# directly return 1.0 when
|
|
346
|
+
if self.done:
|
|
347
|
+
# directly return 1.0 when finished
|
|
344
348
|
return 1.0
|
|
349
|
+
# Don't return 1.0 when cancelled, calculate actual progress
|
|
345
350
|
|
|
346
351
|
tasks = finished_tasks = 0
|
|
347
352
|
for main_progress in self._main_progresses:
|
|
@@ -376,6 +381,7 @@ class CancellableDownloader:
|
|
|
376
381
|
|
|
377
382
|
def cancel(self):
|
|
378
383
|
self._cancelled.set()
|
|
384
|
+
self._done_event.set()
|
|
379
385
|
|
|
380
386
|
@property
|
|
381
387
|
def cancelled(self):
|
|
@@ -392,39 +398,76 @@ class CancellableDownloader:
|
|
|
392
398
|
raise self._cancel_error_cls(error_msg)
|
|
393
399
|
|
|
394
400
|
def patch_tqdm(self):
|
|
395
|
-
#
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
401
|
+
# Use class-level patching to avoid conflicts
|
|
402
|
+
with self._patch_lock:
|
|
403
|
+
if self._original_update is None:
|
|
404
|
+
self._original_update = original_update = tqdm.update
|
|
405
|
+
|
|
406
|
+
# Thread-safe patched update
|
|
407
|
+
def patched_update(tqdm_instance, n):
|
|
408
|
+
import gc
|
|
409
|
+
|
|
410
|
+
# Get all CancellableDownloader instances and check for cancellation
|
|
411
|
+
downloaders = [
|
|
412
|
+
obj
|
|
413
|
+
for obj in gc.get_objects()
|
|
414
|
+
if isinstance(obj, CancellableDownloader)
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
for downloader in downloaders:
|
|
418
|
+
# if download cancelled, throw error
|
|
419
|
+
if getattr(downloader, "cancelled", False):
|
|
420
|
+
downloader.raise_error()
|
|
421
|
+
|
|
422
|
+
progresses = None
|
|
423
|
+
if not getattr(tqdm_instance, "disable", False):
|
|
424
|
+
unit = getattr(tqdm_instance, "unit", "it")
|
|
425
|
+
if unit == "it":
|
|
426
|
+
progresses = getattr(
|
|
427
|
+
downloader, "_main_progresses", None
|
|
428
|
+
)
|
|
429
|
+
else:
|
|
430
|
+
progresses = getattr(
|
|
431
|
+
downloader, "_download_progresses", None
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
if progresses is not None:
|
|
435
|
+
progresses.add(tqdm_instance)
|
|
436
|
+
else:
|
|
437
|
+
logger.debug(
|
|
438
|
+
f"No progresses found for downloader {downloader}"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Call original update with safety check
|
|
442
|
+
return original_update(tqdm_instance, n)
|
|
443
|
+
|
|
444
|
+
tqdm.update = patched_update
|
|
413
445
|
|
|
414
446
|
def unpatch_tqdm(self):
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
447
|
+
with self._patch_lock:
|
|
448
|
+
if self._original_update is not None and self._active_instances == 0:
|
|
449
|
+
tqdm.update = self._original_update
|
|
450
|
+
self._original_update = None
|
|
419
451
|
|
|
420
452
|
def __enter__(self):
|
|
421
|
-
|
|
453
|
+
# Use global lock to prevent concurrent patching
|
|
454
|
+
with self._global_lock:
|
|
455
|
+
if self._active_instances == 0:
|
|
456
|
+
self.patch_tqdm()
|
|
457
|
+
self._active_instances += 1
|
|
422
458
|
return self
|
|
423
459
|
|
|
424
460
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
425
|
-
|
|
426
|
-
self.
|
|
427
|
-
|
|
461
|
+
# Use global lock to prevent concurrent unpatching
|
|
462
|
+
with self._global_lock:
|
|
463
|
+
self._active_instances -= 1
|
|
464
|
+
if self._active_instances == 0:
|
|
465
|
+
self.unpatch_tqdm()
|
|
466
|
+
try:
|
|
467
|
+
self._done_event.set()
|
|
468
|
+
self.reset()
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.debug(f"Error during CancellableDownloader cleanup: {e}")
|
|
428
471
|
|
|
429
472
|
|
|
430
473
|
def get_engine_params_by_name(
|
|
@@ -41,7 +41,7 @@ window_length : int, optional
|
|
|
41
41
|
hop_length : int, optional
|
|
42
42
|
Hop length of STFT, by default ``window_length // 4``.
|
|
43
43
|
window_type : str, optional
|
|
44
|
-
Type of window to use, by default ``sqrt
|
|
44
|
+
Type of window to use, by default ``sqrt\\_hann``.
|
|
45
45
|
match_stride : bool, optional
|
|
46
46
|
Whether to match the stride of convolutional layers, by default False
|
|
47
47
|
padding_type : str, optional
|
|
@@ -1011,7 +1011,7 @@ class AudioSignal(
|
|
|
1011
1011
|
def get_window(window_type: str, window_length: int, device: str):
|
|
1012
1012
|
"""Wrapper around scipy.signal.get_window so one can also get the
|
|
1013
1013
|
popular sqrt-hann window. This function caches for efficiency
|
|
1014
|
-
using functools.lru
|
|
1014
|
+
using functools.lru\\_cache.
|
|
1015
1015
|
|
|
1016
1016
|
Parameters
|
|
1017
1017
|
----------
|
|
@@ -1089,7 +1089,7 @@ class AudioSignal(
|
|
|
1089
1089
|
def compute_stft_padding(
|
|
1090
1090
|
self, window_length: int, hop_length: int, match_stride: bool
|
|
1091
1091
|
):
|
|
1092
|
-
"""Compute how the STFT should be padded, based on match
|
|
1092
|
+
"""Compute how the STFT should be padded, based on match\\_stride.
|
|
1093
1093
|
|
|
1094
1094
|
Parameters
|
|
1095
1095
|
----------
|
|
@@ -1138,7 +1138,7 @@ class AudioSignal(
|
|
|
1138
1138
|
hop_length : int, optional
|
|
1139
1139
|
Hop length of STFT, by default ``window_length // 4``.
|
|
1140
1140
|
window_type : str, optional
|
|
1141
|
-
Type of window to use, by default ``sqrt
|
|
1141
|
+
Type of window to use, by default ``sqrt\\_hann``.
|
|
1142
1142
|
match_stride : bool, optional
|
|
1143
1143
|
Whether to match the stride of convolutional layers, by default False
|
|
1144
1144
|
padding_type : str, optional
|
|
@@ -1219,7 +1219,7 @@ class AudioSignal(
|
|
|
1219
1219
|
match_stride: bool = None,
|
|
1220
1220
|
length: int = None,
|
|
1221
1221
|
):
|
|
1222
|
-
"""Computes inverse STFT and sets it to audio
|
|
1222
|
+
"""Computes inverse STFT and sets it to audio\\_data.
|
|
1223
1223
|
|
|
1224
1224
|
Parameters
|
|
1225
1225
|
----------
|
|
@@ -1228,7 +1228,7 @@ class AudioSignal(
|
|
|
1228
1228
|
hop_length : int, optional
|
|
1229
1229
|
Hop length of STFT, by default ``window_length // 4``.
|
|
1230
1230
|
window_type : str, optional
|
|
1231
|
-
Type of window to use, by default ``sqrt
|
|
1231
|
+
Type of window to use, by default ``sqrt\\_hann``.
|
|
1232
1232
|
match_stride : bool, optional
|
|
1233
1233
|
Whether to match the stride of convolutional layers, by default False
|
|
1234
1234
|
length : int, optional
|
|
@@ -209,13 +209,13 @@ def _g2p_v2(segments):
|
|
|
209
209
|
for text in segments:
|
|
210
210
|
assert spliter not in text
|
|
211
211
|
# replace all english words
|
|
212
|
-
text = re.sub('([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
|
|
212
|
+
text = re.sub(r'([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
|
|
213
213
|
texts = text.split(spliter)
|
|
214
214
|
texts = [t for t in texts if len(t) > 0]
|
|
215
215
|
|
|
216
216
|
|
|
217
217
|
for text in texts:
|
|
218
|
-
if re.match('[a-zA-Z\s]+', text):
|
|
218
|
+
if re.match(r'[a-zA-Z\s]+', text):
|
|
219
219
|
# english
|
|
220
220
|
tokenized_en = tokenizer.tokenize(text)
|
|
221
221
|
phones_en, tones_en, word2ph_en = g2p_en(text=None, pad_start_end=False, tokenized=tokenized_en)
|
xinference/types.py
CHANGED
|
@@ -47,6 +47,15 @@ class ImageList(TypedDict):
|
|
|
47
47
|
data: List[Image]
|
|
48
48
|
|
|
49
49
|
|
|
50
|
+
class ImageEditRequest(TypedDict, total=False):
|
|
51
|
+
image: Union[Union[str, bytes], List[Union[str, bytes]]]
|
|
52
|
+
mask: Optional[Union[str, bytes]]
|
|
53
|
+
prompt: str
|
|
54
|
+
n: int
|
|
55
|
+
size: Optional[str]
|
|
56
|
+
response_format: str
|
|
57
|
+
|
|
58
|
+
|
|
50
59
|
class SDAPIResult(TypedDict):
|
|
51
60
|
images: List[str]
|
|
52
61
|
parameters: dict
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "./static/css/main.5ea97072.css",
|
|
4
|
-
"main.js": "./static/js/main.
|
|
4
|
+
"main.js": "./static/js/main.45e78536.js",
|
|
5
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
6
6
|
"index.html": "./index.html",
|
|
7
7
|
"main.5ea97072.css.map": "./static/css/main.5ea97072.css.map",
|
|
8
|
-
"main.
|
|
8
|
+
"main.45e78536.js.map": "./static/js/main.45e78536.js.map"
|
|
9
9
|
},
|
|
10
10
|
"entrypoints": [
|
|
11
11
|
"static/css/main.5ea97072.css",
|
|
12
|
-
"static/js/main.
|
|
12
|
+
"static/js/main.45e78536.js"
|
|
13
13
|
]
|
|
14
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.45e78536.js"></script><link href="./static/css/main.5ea97072.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|