xinference 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (38) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +462 -3
  3. xinference/client/restful/async_restful_client.py +158 -5
  4. xinference/client/restful/restful_client.py +131 -0
  5. xinference/core/supervisor.py +12 -0
  6. xinference/model/audio/model_spec.json +20 -20
  7. xinference/model/image/model_spec.json +159 -159
  8. xinference/model/llm/__init__.py +2 -2
  9. xinference/model/llm/llm_family.json +843 -180
  10. xinference/model/llm/mlx/distributed_models/core.py +41 -0
  11. xinference/model/llm/mlx/distributed_models/qwen2.py +1 -2
  12. xinference/model/llm/sglang/core.py +20 -6
  13. xinference/model/llm/tool_parsers/qwen_tool_parser.py +29 -4
  14. xinference/model/llm/transformers/chatglm.py +3 -0
  15. xinference/model/llm/transformers/core.py +129 -36
  16. xinference/model/llm/transformers/multimodal/minicpmv45.py +340 -0
  17. xinference/model/llm/transformers/utils.py +23 -0
  18. xinference/model/llm/utils.py +37 -24
  19. xinference/model/llm/vllm/core.py +128 -69
  20. xinference/model/utils.py +74 -31
  21. xinference/thirdparty/audiotools/core/audio_signal.py +6 -6
  22. xinference/thirdparty/fish_speech/fish_speech/text/chn_text_norm/text.py +1 -1
  23. xinference/thirdparty/melo/text/chinese_mix.py +2 -2
  24. xinference/types.py +9 -0
  25. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  26. xinference/ui/web/ui/build/index.html +1 -1
  27. xinference/ui/web/ui/build/static/js/{main.d192c4f3.js → main.45e78536.js} +3 -3
  28. xinference/ui/web/ui/build/static/js/main.45e78536.js.map +1 -0
  29. xinference/ui/web/ui/node_modules/.cache/babel-loader/ea2a26361204e70cf1018d6990fb6354bed82b3ac69690391e0f100385e7abb7.json +1 -0
  30. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/METADATA +7 -5
  31. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/RECORD +36 -35
  32. xinference/ui/web/ui/build/static/js/main.d192c4f3.js.map +0 -1
  33. xinference/ui/web/ui/node_modules/.cache/babel-loader/f995a2425dfb0822fd07127f66ffe9b026883bc156b402eb8bd0b83d52460a93.json +0 -1
  34. /xinference/ui/web/ui/build/static/js/{main.d192c4f3.js.LICENSE.txt → main.45e78536.js.LICENSE.txt} +0 -0
  35. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/WHEEL +0 -0
  36. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/entry_points.txt +0 -0
  37. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/licenses/LICENSE +0 -0
  38. {xinference-1.10.1.dist-info → xinference-1.11.0.dist-info}/top_level.txt +0 -0
@@ -131,7 +131,7 @@ except ImportError:
131
131
  VLLM_INSTALLED = False
132
132
  VLLM_VERSION = None
133
133
 
134
- VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = []
134
+ VLLM_SUPPORTED_MULTI_MODEL_LIST: List[str] = []
135
135
  VLLM_SUPPORTED_MODELS = [
136
136
  "llama-2",
137
137
  "llama-3",
@@ -229,34 +229,37 @@ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.5.3"):
229
229
  VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
230
230
 
231
231
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.1"):
232
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
233
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5")
234
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5-MPO")
235
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL3")
232
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("internvl2")
233
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5")
234
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5-MPO")
235
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL3")
236
236
 
237
237
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.2"):
238
238
  VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
239
239
 
240
240
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.3"):
241
241
  VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
242
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
243
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
244
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("QvQ-72B-Preview")
242
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("llama-3.2-vision-instruct")
243
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-vl-instruct")
244
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("QvQ-72B-Preview")
245
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio")
245
246
 
246
247
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.0"):
247
248
  VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
248
249
 
249
250
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.2"):
250
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2.5-vl-instruct")
251
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-vl-instruct")
251
252
  VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
253
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio-instruct")
252
254
 
253
255
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.3"):
254
256
  VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
255
257
  VLLM_SUPPORTED_CHAT_MODELS.append("qwenLong-l1")
258
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-omni")
256
259
 
257
260
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.0"):
258
261
  VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
259
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("gemma-3-it")
262
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("gemma-3-it")
260
263
 
261
264
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
262
265
  VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
@@ -272,7 +275,7 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
272
275
 
273
276
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
274
277
  VLLM_SUPPORTED_CHAT_MODELS.append("Ernie4.5")
275
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.1v-thinking")
278
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.1v-thinking")
276
279
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
277
280
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
278
281
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
@@ -280,7 +283,7 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
280
283
 
281
284
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
282
285
  VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")
283
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.5v")
286
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.5v")
284
287
  VLLM_SUPPORTED_CHAT_MODELS.append("KAT-V1")
285
288
 
286
289
  if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
@@ -291,9 +294,11 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
291
294
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
292
295
  VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
293
296
 
294
- if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
295
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
296
- VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")
297
+ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.11.0"):
298
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Thinking")
299
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Instruct")
300
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Thinking")
301
+ VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Instruct")
297
302
 
298
303
 
299
304
  class VLLMModel(LLM):
@@ -545,7 +550,7 @@ class VLLMModel(LLM):
545
550
  # patch vllm Executor.get_class
546
551
  Executor.get_class = lambda vllm_config: executor_cls
547
552
  self._engine = AsyncLLMEngine.from_engine_args(engine_args)
548
- except:
553
+ except: # noqa: E722
549
554
  logger.exception("Creating vllm engine failed")
550
555
  self._loading_error = sys.exc_info()
551
556
 
@@ -714,7 +719,7 @@ class VLLMModel(LLM):
714
719
  logger.info("Detecting vLLM is not health, prepare to quit the process")
715
720
  try:
716
721
  self.stop()
717
- except:
722
+ except: # noqa: E722
718
723
  # ignore error when stop
719
724
  pass
720
725
  # Just kill the process and let xinference auto-recover the model
@@ -857,7 +862,7 @@ class VLLMModel(LLM):
857
862
  if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
858
863
  return False
859
864
  if llm_spec.model_format == "pytorch":
860
- if quantization != "none" and not (quantization is None):
865
+ if quantization != "none" and quantization is not None:
861
866
  return False
862
867
  if llm_spec.model_format == "awq":
863
868
  # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
@@ -988,7 +993,10 @@ class VLLMModel(LLM):
988
993
  from vllm import TokensPrompt
989
994
 
990
995
  token_ids = await asyncio.to_thread(
991
- self._tokenize, tokenizer, prompt, config # type: ignore
996
+ self._tokenize,
997
+ tokenizer,
998
+ prompt, # type: ignore
999
+ config,
992
1000
  )
993
1001
  return TokensPrompt(prompt_token_ids=token_ids)
994
1002
 
@@ -1082,18 +1090,43 @@ class VLLMModel(LLM):
1082
1090
  logger.warning(f"Failed to create GuidedDecodingParams: {e}")
1083
1091
  guided_options = None
1084
1092
 
1085
- # Use structured_outputs for vLLM >= 0.11.0, guided_decoding for older versions
1086
- if (
1087
- VLLM_VERSION >= version.parse("0.11.0")
1088
- or VLLM_VERSION.base_version >= "0.11.0"
1089
- ):
1090
- sampling_params = SamplingParams(
1091
- structured_outputs=guided_options, **sanitized_generate_config
1092
- )
1093
- else:
1094
- sampling_params = SamplingParams(
1095
- guided_decoding=guided_options, **sanitized_generate_config
1093
+ try:
1094
+ import inspect
1095
+
1096
+ sp_sig = inspect.signature(SamplingParams)
1097
+ # For v0.9.2 and similar versions, prioritize guided_decoding over structured_outputs
1098
+ # structured_outputs was introduced later (around v0.11.0) and may not accept
1099
+ # GuidedDecodingParams in earlier versions even if the parameter exists
1100
+ if "guided_decoding" in sp_sig.parameters:
1101
+ sampling_params = SamplingParams(
1102
+ guided_decoding=guided_options, **sanitized_generate_config
1103
+ )
1104
+ elif "structured_outputs" in sp_sig.parameters:
1105
+ try:
1106
+ sampling_params = SamplingParams(
1107
+ structured_outputs=guided_options,
1108
+ **sanitized_generate_config,
1109
+ )
1110
+ except TypeError as e:
1111
+ if "structured_outputs" in str(e):
1112
+ # structured_outputs parameter exists but doesn't accept GuidedDecodingParams
1113
+ # Fall back to no guided decoding
1114
+ logger.warning(
1115
+ f"structured_outputs parameter failed: {e}. "
1116
+ "Falling back to no guided decoding for vLLM version compatibility."
1117
+ )
1118
+ sampling_params = SamplingParams(
1119
+ **sanitized_generate_config
1120
+ )
1121
+ else:
1122
+ raise
1123
+ else:
1124
+ sampling_params = SamplingParams(**sanitized_generate_config)
1125
+ except Exception as e:
1126
+ logger.warning(
1127
+ f"Failed to create SamplingParams with guided decoding: {e}"
1096
1128
  )
1129
+ sampling_params = SamplingParams(**sanitized_generate_config)
1097
1130
  else:
1098
1131
  # ignore generate configs for older versions
1099
1132
  sanitized_generate_config.pop("guided_json", None)
@@ -1111,7 +1144,9 @@ class VLLMModel(LLM):
1111
1144
  # this requires tokenizing
1112
1145
  tokenizer = await self._get_tokenizer(lora_request)
1113
1146
  prompt_or_token_ids = await self._gen_tokens_prompt(
1114
- tokenizer, prompt, sanitized_generate_config # type: ignore
1147
+ tokenizer,
1148
+ prompt,
1149
+ sanitized_generate_config, # type: ignore
1115
1150
  )
1116
1151
  sampling_params.max_tokens = max_tokens = self._context_length - len( # type: ignore
1117
1152
  prompt_or_token_ids["prompt_token_ids"] # type: ignore
@@ -1266,11 +1301,10 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1266
1301
  ]:
1267
1302
  return False
1268
1303
  if llm_spec.model_format == "pytorch":
1269
- if quantization != "none" and not (quantization is None):
1304
+ if quantization != "none" and quantization is not None:
1270
1305
  return False
1271
1306
  if llm_spec.model_format == "awq":
1272
- # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
1273
- if "4" not in quantization:
1307
+ if not any(q in quantization for q in ("4", "8")):
1274
1308
  return False
1275
1309
  if llm_spec.model_format == "gptq":
1276
1310
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1430,7 +1464,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
1430
1464
  return self._to_chat_completion(c, self.reasoning_parser)
1431
1465
 
1432
1466
 
1433
- class VLLMVisionModel(VLLMModel, ChatModelMixin):
1467
+ class VLLMMultiModel(VLLMModel, ChatModelMixin):
1434
1468
  @classmethod
1435
1469
  def match_json(
1436
1470
  cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
@@ -1442,11 +1476,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1442
1476
  if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
1443
1477
  return False
1444
1478
  if llm_spec.model_format == "pytorch":
1445
- if quantization != "none" and not (quantization is None):
1479
+ if quantization != "none" and quantization is not None:
1446
1480
  return False
1447
1481
  if llm_spec.model_format == "awq":
1448
- # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
1449
- if "4" not in quantization:
1482
+ if not any(q in quantization for q in ("4", "8")):
1450
1483
  return False
1451
1484
  if llm_spec.model_format == "gptq":
1452
1485
  if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1456,12 +1489,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1456
1489
  if "4" not in quantization:
1457
1490
  return False
1458
1491
  if isinstance(llm_family, CustomLLMFamilyV2):
1459
- if llm_family.model_family not in VLLM_SUPPORTED_VISION_MODEL_LIST:
1492
+ if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
1460
1493
  return False
1461
1494
  else:
1462
- if llm_family.model_name not in VLLM_SUPPORTED_VISION_MODEL_LIST:
1495
+ if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
1463
1496
  return False
1464
- if "vision" not in llm_family.model_ability:
1497
+ if (
1498
+ "vision" not in llm_family.model_ability
1499
+ and "audio" not in llm_family.model_ability
1500
+ and "omni" not in llm_family.model_ability
1501
+ ):
1465
1502
  return False
1466
1503
  return VLLM_INSTALLED
1467
1504
 
@@ -1470,13 +1507,21 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1470
1507
  ) -> VLLMModelConfig:
1471
1508
  model_config = super()._sanitize_model_config(model_config)
1472
1509
  if VLLM_VERSION >= version.parse("0.5.5"):
1473
- model_config["limit_mm_per_prompt"] = (
1474
- json.loads(model_config.get("limit_mm_per_prompt")) # type: ignore
1475
- if model_config.get("limit_mm_per_prompt")
1476
- else {
1477
- "image": 2, # default 2 images all chat
1478
- }
1479
- )
1510
+ if model_config.get("limit_mm_per_prompt"):
1511
+ model_config["limit_mm_per_prompt"] = json.loads(
1512
+ model_config.get("limit_mm_per_prompt") # type: ignore
1513
+ )
1514
+ else:
1515
+ if "omni" in self.model_family.model_ability:
1516
+ model_config["limit_mm_per_prompt"] = {
1517
+ "image": 2,
1518
+ "video": 2,
1519
+ "audio": 2,
1520
+ }
1521
+ elif "vision" in self.model_family.model_ability:
1522
+ model_config["limit_mm_per_prompt"] = {"image": 2, "video": 2}
1523
+ elif "audio" in self.model_family.model_ability:
1524
+ model_config["limit_mm_per_prompt"] = {"audio": 2}
1480
1525
  return model_config
1481
1526
 
1482
1527
  def _sanitize_chat_config(
@@ -1510,7 +1555,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1510
1555
  multi_modal_data = prompt.get("multi_modal_data")
1511
1556
 
1512
1557
  token_ids = await asyncio.to_thread(
1513
- self._tokenize, tokenizer, prompt_str, config # type: ignore
1558
+ self._tokenize,
1559
+ tokenizer,
1560
+ prompt_str,
1561
+ config, # type: ignore
1514
1562
  )
1515
1563
  return TokensPrompt(
1516
1564
  prompt_token_ids=token_ids, multi_modal_data=multi_modal_data
@@ -1526,9 +1574,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1526
1574
  tools = generate_config.pop("tools", []) if generate_config else None
1527
1575
 
1528
1576
  model_family = self.model_family.model_family or self.model_family.model_name
1529
-
1577
+ audios, images, videos = None, None, None
1530
1578
  if "internvl" not in model_family.lower():
1531
- from qwen_vl_utils import process_vision_info
1579
+ from qwen_omni_utils import (
1580
+ process_audio_info,
1581
+ process_mm_info,
1582
+ process_vision_info,
1583
+ )
1532
1584
 
1533
1585
  messages = self._transform_messages(messages)
1534
1586
 
@@ -1543,29 +1595,36 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
1543
1595
  if tools and model_family in QWEN_TOOL_CALL_FAMILY:
1544
1596
  full_context_kwargs["tools"] = tools
1545
1597
  assert self.model_family.chat_template is not None
1598
+ if "omni" in self.model_family.model_ability:
1599
+ audios, images, videos = process_mm_info(
1600
+ messages, use_audio_in_video=True
1601
+ )
1602
+ elif "audio" in self.model_family.model_ability:
1603
+ audios = process_audio_info(messages, use_audio_in_video=False)
1604
+ elif "vision" in self.model_family.model_ability:
1605
+ images, videos = process_vision_info( # type: ignore
1606
+ messages, return_video_kwargs=False
1607
+ )
1608
+
1546
1609
  prompt = self.get_full_context(
1547
1610
  messages, self.model_family.chat_template, **full_context_kwargs
1548
1611
  )
1549
- images, video_inputs = process_vision_info(messages)
1550
- if video_inputs:
1551
- raise ValueError("Not support video input now.")
1552
- else:
1553
- prompt, images = self.get_specific_prompt(model_family, messages)
1554
1612
 
1555
- if not images:
1556
- inputs = {
1557
- "prompt": prompt,
1558
- }
1559
- elif len(images) == 1:
1560
- inputs = {
1561
- "prompt": prompt,
1562
- "multi_modal_data": {"image": images[-1]}, # type: ignore
1563
- }
1564
1613
  else:
1565
- inputs = {
1566
- "prompt": prompt,
1567
- "multi_modal_data": {"image": images}, # type: ignore
1568
- }
1614
+ prompt, images = self.get_specific_prompt(model_family, messages)
1615
+ inputs = {"prompt": prompt, "multi_modal_data": {}, "mm_processor_kwargs": {}}
1616
+ if images:
1617
+ inputs["multi_modal_data"]["image"] = images
1618
+ if videos:
1619
+ inputs["multi_modal_data"]["video"] = videos
1620
+ if audios:
1621
+ inputs["multi_modal_data"]["audio"] = audios
1622
+ if "omni" in self.model_family.model_ability:
1623
+ inputs["mm_processor_kwargs"]["use_audio_in_video"] = True
1624
+ if inputs["multi_modal_data"] == {}:
1625
+ inputs.pop("multi_modal_data")
1626
+ if inputs["mm_processor_kwargs"] == {}:
1627
+ inputs.pop("mm_processor_kwargs")
1569
1628
  generate_config = self._sanitize_chat_config(generate_config)
1570
1629
 
1571
1630
  stream = generate_config.get("stream", None)
xinference/model/utils.py CHANGED
@@ -315,6 +315,11 @@ def set_all_random_seed(seed: int):
315
315
 
316
316
 
317
317
  class CancellableDownloader:
318
+ _global_lock = threading.Lock()
319
+ _active_instances = 0
320
+ _original_update = None # Class-level original update method
321
+ _patch_lock = threading.Lock() # Additional lock for patching operations
322
+
318
323
  def __init__(
319
324
  self,
320
325
  cancel_error_cls: Type[BaseException] = asyncio.CancelledError,
@@ -325,23 +330,23 @@ class CancellableDownloader:
325
330
  self._cancelled = threading.Event()
326
331
  self._done_event = threading.Event()
327
332
  self._cancel_error_cls = cancel_error_cls
328
- self._original_update = None
329
333
  # progress for tqdm that is main
330
334
  self._main_progresses: Set[tqdm] = set()
331
335
  # progress for file downloader
332
336
  # mainly when tqdm unit is set
333
337
  self._download_progresses: Set[tqdm] = set()
334
- # tqdm original update
335
- self._original_tqdm_update = None
338
+ # Instance-specific tqdm tracking
339
+ self._patched_instances: Set[int] = set()
336
340
 
337
341
  def reset(self):
338
342
  self._main_progresses.clear()
339
343
  self._download_progresses.clear()
340
344
 
341
345
  def get_progress(self) -> float:
342
- if self.cancelled or self.done:
343
- # directly return 1.0 when cancelled or finished
346
+ if self.done:
347
+ # directly return 1.0 when finished
344
348
  return 1.0
349
+ # Don't return 1.0 when cancelled, calculate actual progress
345
350
 
346
351
  tasks = finished_tasks = 0
347
352
  for main_progress in self._main_progresses:
@@ -376,6 +381,7 @@ class CancellableDownloader:
376
381
 
377
382
  def cancel(self):
378
383
  self._cancelled.set()
384
+ self._done_event.set()
379
385
 
380
386
  @property
381
387
  def cancelled(self):
@@ -392,39 +398,76 @@ class CancellableDownloader:
392
398
  raise self._cancel_error_cls(error_msg)
393
399
 
394
400
  def patch_tqdm(self):
395
- # patch tqdm
396
- # raise error if cancelled
397
- self._original_update = original_update = tqdm.update
398
- downloader = self
399
-
400
- def patched_update(self, n):
401
- if downloader.cancelled:
402
- downloader.raise_error()
403
- if not self.disable:
404
- progresses = (
405
- downloader._main_progresses
406
- if getattr(self, "unit", "it") == "it"
407
- else downloader._download_progresses
408
- )
409
- progresses.add(self)
410
- return original_update(self, n)
411
-
412
- tqdm.update = patched_update
401
+ # Use class-level patching to avoid conflicts
402
+ with self._patch_lock:
403
+ if self._original_update is None:
404
+ self._original_update = original_update = tqdm.update
405
+
406
+ # Thread-safe patched update
407
+ def patched_update(tqdm_instance, n):
408
+ import gc
409
+
410
+ # Get all CancellableDownloader instances and check for cancellation
411
+ downloaders = [
412
+ obj
413
+ for obj in gc.get_objects()
414
+ if isinstance(obj, CancellableDownloader)
415
+ ]
416
+
417
+ for downloader in downloaders:
418
+ # if download cancelled, throw error
419
+ if getattr(downloader, "cancelled", False):
420
+ downloader.raise_error()
421
+
422
+ progresses = None
423
+ if not getattr(tqdm_instance, "disable", False):
424
+ unit = getattr(tqdm_instance, "unit", "it")
425
+ if unit == "it":
426
+ progresses = getattr(
427
+ downloader, "_main_progresses", None
428
+ )
429
+ else:
430
+ progresses = getattr(
431
+ downloader, "_download_progresses", None
432
+ )
433
+
434
+ if progresses is not None:
435
+ progresses.add(tqdm_instance)
436
+ else:
437
+ logger.debug(
438
+ f"No progresses found for downloader {downloader}"
439
+ )
440
+
441
+ # Call original update with safety check
442
+ return original_update(tqdm_instance, n)
443
+
444
+ tqdm.update = patched_update
413
445
 
414
446
  def unpatch_tqdm(self):
415
- from tqdm.auto import tqdm
416
-
417
- if self._original_update:
418
- tqdm.update = self._original_update
447
+ with self._patch_lock:
448
+ if self._original_update is not None and self._active_instances == 0:
449
+ tqdm.update = self._original_update
450
+ self._original_update = None
419
451
 
420
452
  def __enter__(self):
421
- self.patch_tqdm()
453
+ # Use global lock to prevent concurrent patching
454
+ with self._global_lock:
455
+ if self._active_instances == 0:
456
+ self.patch_tqdm()
457
+ self._active_instances += 1
422
458
  return self
423
459
 
424
460
  def __exit__(self, exc_type, exc_val, exc_tb):
425
- self.unpatch_tqdm()
426
- self._done_event.set()
427
- self.reset()
461
+ # Use global lock to prevent concurrent unpatching
462
+ with self._global_lock:
463
+ self._active_instances -= 1
464
+ if self._active_instances == 0:
465
+ self.unpatch_tqdm()
466
+ try:
467
+ self._done_event.set()
468
+ self.reset()
469
+ except Exception as e:
470
+ logger.debug(f"Error during CancellableDownloader cleanup: {e}")
428
471
 
429
472
 
430
473
  def get_engine_params_by_name(
@@ -41,7 +41,7 @@ window_length : int, optional
41
41
  hop_length : int, optional
42
42
  Hop length of STFT, by default ``window_length // 4``.
43
43
  window_type : str, optional
44
- Type of window to use, by default ``sqrt\_hann``.
44
+ Type of window to use, by default ``sqrt\\_hann``.
45
45
  match_stride : bool, optional
46
46
  Whether to match the stride of convolutional layers, by default False
47
47
  padding_type : str, optional
@@ -1011,7 +1011,7 @@ class AudioSignal(
1011
1011
  def get_window(window_type: str, window_length: int, device: str):
1012
1012
  """Wrapper around scipy.signal.get_window so one can also get the
1013
1013
  popular sqrt-hann window. This function caches for efficiency
1014
- using functools.lru\_cache.
1014
+ using functools.lru\\_cache.
1015
1015
 
1016
1016
  Parameters
1017
1017
  ----------
@@ -1089,7 +1089,7 @@ class AudioSignal(
1089
1089
  def compute_stft_padding(
1090
1090
  self, window_length: int, hop_length: int, match_stride: bool
1091
1091
  ):
1092
- """Compute how the STFT should be padded, based on match\_stride.
1092
+ """Compute how the STFT should be padded, based on match\\_stride.
1093
1093
 
1094
1094
  Parameters
1095
1095
  ----------
@@ -1138,7 +1138,7 @@ class AudioSignal(
1138
1138
  hop_length : int, optional
1139
1139
  Hop length of STFT, by default ``window_length // 4``.
1140
1140
  window_type : str, optional
1141
- Type of window to use, by default ``sqrt\_hann``.
1141
+ Type of window to use, by default ``sqrt\\_hann``.
1142
1142
  match_stride : bool, optional
1143
1143
  Whether to match the stride of convolutional layers, by default False
1144
1144
  padding_type : str, optional
@@ -1219,7 +1219,7 @@ class AudioSignal(
1219
1219
  match_stride: bool = None,
1220
1220
  length: int = None,
1221
1221
  ):
1222
- """Computes inverse STFT and sets it to audio\_data.
1222
+ """Computes inverse STFT and sets it to audio\\_data.
1223
1223
 
1224
1224
  Parameters
1225
1225
  ----------
@@ -1228,7 +1228,7 @@ class AudioSignal(
1228
1228
  hop_length : int, optional
1229
1229
  Hop length of STFT, by default ``window_length // 4``.
1230
1230
  window_type : str, optional
1231
- Type of window to use, by default ``sqrt\_hann``.
1231
+ Type of window to use, by default ``sqrt\\_hann``.
1232
1232
  match_stride : bool, optional
1233
1233
  Whether to match the stride of convolutional layers, by default False
1234
1234
  length : int, optional
@@ -68,7 +68,7 @@ class Text:
68
68
  pattern = re.compile(
69
69
  r"\D+((\d+(\.\d+)?)[多余几]?"
70
70
  + CURRENCY_UNITS
71
- + "(\d"
71
+ + R"(\d"
72
72
  + CURRENCY_UNITS
73
73
  + "?)?)"
74
74
  )
@@ -209,13 +209,13 @@ def _g2p_v2(segments):
209
209
  for text in segments:
210
210
  assert spliter not in text
211
211
  # replace all english words
212
- text = re.sub('([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
212
+ text = re.sub(r'([a-zA-Z\s]+)', lambda x: f'{spliter}{x.group(1)}{spliter}', text)
213
213
  texts = text.split(spliter)
214
214
  texts = [t for t in texts if len(t) > 0]
215
215
 
216
216
 
217
217
  for text in texts:
218
- if re.match('[a-zA-Z\s]+', text):
218
+ if re.match(r'[a-zA-Z\s]+', text):
219
219
  # english
220
220
  tokenized_en = tokenizer.tokenize(text)
221
221
  phones_en, tones_en, word2ph_en = g2p_en(text=None, pad_start_end=False, tokenized=tokenized_en)
xinference/types.py CHANGED
@@ -47,6 +47,15 @@ class ImageList(TypedDict):
47
47
  data: List[Image]
48
48
 
49
49
 
50
+ class ImageEditRequest(TypedDict, total=False):
51
+ image: Union[Union[str, bytes], List[Union[str, bytes]]]
52
+ mask: Optional[Union[str, bytes]]
53
+ prompt: str
54
+ n: int
55
+ size: Optional[str]
56
+ response_format: str
57
+
58
+
50
59
  class SDAPIResult(TypedDict):
51
60
  images: List[str]
52
61
  parameters: dict
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.5ea97072.css",
4
- "main.js": "./static/js/main.d192c4f3.js",
4
+ "main.js": "./static/js/main.45e78536.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.5ea97072.css.map": "./static/css/main.5ea97072.css.map",
8
- "main.d192c4f3.js.map": "./static/js/main.d192c4f3.js.map"
8
+ "main.45e78536.js.map": "./static/js/main.45e78536.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.5ea97072.css",
12
- "static/js/main.d192c4f3.js"
12
+ "static/js/main.45e78536.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.d192c4f3.js"></script><link href="./static/css/main.5ea97072.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.45e78536.js"></script><link href="./static/css/main.5ea97072.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>