PyPI - xinference - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

xinference 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (328) hide show

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -131,7 +131,7 @@ except ImportError:
     VLLM_INSTALLED = False
     VLLM_VERSION = None
-VLLM_SUPPORTED_VISION_MODEL_LIST: List[str] = []
+VLLM_SUPPORTED_MULTI_MODEL_LIST: List[str] = []
 VLLM_SUPPORTED_MODELS = [
     "llama-2",
     "llama-3",
@@ -229,34 +229,37 @@ if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.5.3"):
     VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.1"):
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5-MPO")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL3")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("internvl2")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL2.5-MPO")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("InternVL3")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.2"):
     VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.6.3"):
     VLLM_SUPPORTED_MODELS.append("llama-3.2-vision")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("llama-3.2-vision-instruct")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("QvQ-72B-Preview")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("llama-3.2-vision-instruct")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-vl-instruct")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("QvQ-72B-Preview")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.0"):
     VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.2"):
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2.5-vl-instruct")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-vl-instruct")
     VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2-audio-instruct")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.7.3"):
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
     VLLM_SUPPORTED_CHAT_MODELS.append("qwenLong-l1")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("qwen2.5-omni")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.0"):
     VLLM_SUPPORTED_CHAT_MODELS.append("gemma-3-1b-it")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("gemma-3-it")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("gemma-3-it")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
     VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
@@ -264,12 +267,15 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.4"):
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.5"):
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
+if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.0"):
+    VLLM_SUPPORTED_CHAT_MODELS.append("Baichuan-M2")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.1"):
     VLLM_SUPPORTED_CHAT_MODELS.append("minicpm4")
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
     VLLM_SUPPORTED_CHAT_MODELS.append("Ernie4.5")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.1v-thinking")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.1v-thinking")
     VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Instruct")
     VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Thinking")
     VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Coder")
@@ -277,15 +283,22 @@ if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.9.2"):
 if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.0"):
     VLLM_SUPPORTED_CHAT_MODELS.append("glm-4.5")
-    VLLM_SUPPORTED_VISION_MODEL_LIST.append("glm-4.5v")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("glm-4.5v")
     VLLM_SUPPORTED_CHAT_MODELS.append("KAT-V1")
 if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.0"):
     VLLM_SUPPORTED_CHAT_MODELS.append("gpt-oss")
-    VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
-if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.1.1"):
+if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
     VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
+    VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
+    VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
+if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.11.0"):
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Thinking")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-VL-Instruct")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Thinking")
+    VLLM_SUPPORTED_MULTI_MODEL_LIST.append("Qwen3-Omni-Instruct")
 class VLLMModel(LLM):
@@ -537,7 +550,7 @@ class VLLMModel(LLM):
                             # patch vllm Executor.get_class
                             Executor.get_class = lambda vllm_config: executor_cls
                             self._engine = AsyncLLMEngine.from_engine_args(engine_args)
-                except:
+                except:  # noqa: E722
                     logger.exception("Creating vllm engine failed")
                     self._loading_error = sys.exc_info()
@@ -706,7 +719,7 @@ class VLLMModel(LLM):
                 logger.info("Detecting vLLM is not health, prepare to quit the process")
                 try:
                     self.stop()
-                except:
+                except:  # noqa: E722
                     # ignore error when stop
                     pass
                 # Just kill the process and let xinference auto-recover the model
@@ -849,7 +862,7 @@ class VLLMModel(LLM):
         if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
             return False
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
+            if quantization != "none" and quantization is not None:
                 return False
         if llm_spec.model_format == "awq":
             # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
@@ -934,9 +947,21 @@ class VLLMModel(LLM):
     async def _get_tokenizer(self, lora_request: Any) -> Any:
         try:
-            return await self._engine.get_tokenizer(lora_request)  # type: ignore
+            # vLLM 0.11.0+ get_tokenizer doesn't accept lora_request parameter
+            if (
+                VLLM_VERSION >= version.parse("0.11.0")
+                or VLLM_VERSION.base_version >= "0.11.0"
+            ):
+                return await self._engine.get_tokenizer()  # type: ignore
+            else:
+                return await self._engine.get_tokenizer(lora_request)  # type: ignore
         except AttributeError:
-            return await self._engine.get_tokenizer_async(lora_request)  # type: ignore
+            # Fallback to get_tokenizer_async for older versions
+            try:
+                return await self._engine.get_tokenizer_async(lora_request)  # type: ignore
+            except (AttributeError, TypeError):
+                # If all else fails, try without parameters
+                return await self._engine.get_tokenizer()  # type: ignore
     def _tokenize(self, tokenizer: Any, prompt: str, config: dict) -> List[int]:
         truncate_prompt_tokens = config.get("truncate_prompt_tokens")
@@ -968,7 +993,10 @@ class VLLMModel(LLM):
         from vllm import TokensPrompt
         token_ids = await asyncio.to_thread(
-            self._tokenize, tokenizer, prompt, config  # type: ignore
+            self._tokenize,
+            tokenizer,
+            prompt,  # type: ignore
+            config,
         )
         return TokensPrompt(prompt_token_ids=token_ids)
@@ -1017,23 +1045,90 @@ class VLLMModel(LLM):
             # guided decoding only available for vllm >= 0.6.3
             from vllm.sampling_params import GuidedDecodingParams
-            guided_options = GuidedDecodingParams.from_optional(
-                json=sanitized_generate_config.pop("guided_json", None),
-                regex=sanitized_generate_config.pop("guided_regex", None),
-                choice=sanitized_generate_config.pop("guided_choice", None),
-                grammar=sanitized_generate_config.pop("guided_grammar", None),
-                json_object=sanitized_generate_config.pop("guided_json_object", None),
-                backend=sanitized_generate_config.pop("guided_decoding_backend", None),
-                whitespace_pattern=sanitized_generate_config.pop(
-                    "guided_whitespace_pattern", None
-                ),
+            # Extract guided decoding parameters
+            guided_params: dict[str, Any] = {}
+            guided_json = sanitized_generate_config.pop("guided_json", None)
+            if guided_json:
+                guided_params["json"] = guided_json
+            guided_regex = sanitized_generate_config.pop("guided_regex", None)
+            if guided_regex:
+                guided_params["regex"] = guided_regex
+            guided_choice = sanitized_generate_config.pop("guided_choice", None)
+            if guided_choice:
+                guided_params["choice"] = guided_choice
+            guided_grammar = sanitized_generate_config.pop("guided_grammar", None)
+            if guided_grammar:
+                guided_params["grammar"] = guided_grammar
+            guided_json_object = sanitized_generate_config.pop(
+                "guided_json_object", None
+            )
+            if guided_json_object:
+                guided_params["json_object"] = guided_json_object
+            guided_backend = sanitized_generate_config.pop(
+                "guided_decoding_backend", None
             )
+            if guided_backend:
+                guided_params["_backend"] = guided_backend
-            sampling_params = SamplingParams(
-                guided_decoding=guided_options, **sanitized_generate_config
+            guided_whitespace_pattern = sanitized_generate_config.pop(
+                "guided_whitespace_pattern", None
             )
+            if guided_whitespace_pattern:
+                guided_params["whitespace_pattern"] = guided_whitespace_pattern
+            # Create GuidedDecodingParams if we have any guided parameters
+            guided_options = None
+            if guided_params:
+                try:
+                    guided_options = GuidedDecodingParams(**guided_params)
+                except Exception as e:
+                    logger.warning(f"Failed to create GuidedDecodingParams: {e}")
+                    guided_options = None
+            try:
+                import inspect
+                sp_sig = inspect.signature(SamplingParams)
+                # For v0.9.2 and similar versions, prioritize guided_decoding over structured_outputs
+                # structured_outputs was introduced later (around v0.11.0) and may not accept
+                # GuidedDecodingParams in earlier versions even if the parameter exists
+                if "guided_decoding" in sp_sig.parameters:
+                    sampling_params = SamplingParams(
+                        guided_decoding=guided_options, **sanitized_generate_config
+                    )
+                elif "structured_outputs" in sp_sig.parameters:
+                    try:
+                        sampling_params = SamplingParams(
+                            structured_outputs=guided_options,
+                            **sanitized_generate_config,
+                        )
+                    except TypeError as e:
+                        if "structured_outputs" in str(e):
+                            # structured_outputs parameter exists but doesn't accept GuidedDecodingParams
+                            # Fall back to no guided decoding
+                            logger.warning(
+                                f"structured_outputs parameter failed: {e}. "
+                                "Falling back to no guided decoding for vLLM version compatibility."
+                            )
+                            sampling_params = SamplingParams(
+                                **sanitized_generate_config
+                            )
+                        else:
+                            raise
+                else:
+                    sampling_params = SamplingParams(**sanitized_generate_config)
+            except Exception as e:
+                logger.warning(
+                    f"Failed to create SamplingParams with guided decoding: {e}"
+                )
+                sampling_params = SamplingParams(**sanitized_generate_config)
         else:
-            # ignore generate configs
+            # ignore generate configs for older versions
             sanitized_generate_config.pop("guided_json", None)
             sanitized_generate_config.pop("guided_regex", None)
             sanitized_generate_config.pop("guided_choice", None)
@@ -1049,7 +1144,9 @@ class VLLMModel(LLM):
             # this requires tokenizing
             tokenizer = await self._get_tokenizer(lora_request)
             prompt_or_token_ids = await self._gen_tokens_prompt(
-                tokenizer, prompt, sanitized_generate_config  # type: ignore
+                tokenizer,
+                prompt,
+                sanitized_generate_config,  # type: ignore
             )
             sampling_params.max_tokens = max_tokens = self._context_length - len(  # type: ignore
                 prompt_or_token_ids["prompt_token_ids"]  # type: ignore
@@ -1204,11 +1301,10 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         ]:
             return False
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
+            if quantization != "none" and quantization is not None:
                 return False
         if llm_spec.model_format == "awq":
-            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
-            if "4" not in quantization:
+            if not any(q in quantization for q in ("4", "8")):
                 return False
         if llm_spec.model_format == "gptq":
             if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1236,6 +1332,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
     ) -> Dict:
         if not generate_config:
             generate_config = {}
         if "reasoning" in getattr(self.model_family, "model_ability", []):
             generate_config.pop("stop", None)
             generate_config.pop("stop_token_ids", None)
@@ -1249,6 +1346,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
                 generate_config["stop_token_ids"] = (
                     self.model_family.stop_token_ids.copy()
                 )
+        # if response_format exists，generate guided_json
+        if "response_format" in generate_config:
+            resp_format = generate_config["response_format"]
+            if (
+                isinstance(resp_format, dict)
+                and resp_format.get("type") == "json_schema"
+                and "json_schema" in resp_format
+            ):
+                schema = resp_format["json_schema"].get("schema_")
+                if schema:
+                    generate_config["guided_json"] = schema
         return generate_config
     @staticmethod
@@ -1354,7 +1464,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
             return self._to_chat_completion(c, self.reasoning_parser)
-class VLLMVisionModel(VLLMModel, ChatModelMixin):
+class VLLMMultiModel(VLLMModel, ChatModelMixin):
     @classmethod
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
@@ -1366,11 +1476,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
         if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
             return False
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
+            if quantization != "none" and quantization is not None:
                 return False
         if llm_spec.model_format == "awq":
-            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
-            if "4" not in quantization:
+            if not any(q in quantization for q in ("4", "8")):
                 return False
         if llm_spec.model_format == "gptq":
             if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
@@ -1380,12 +1489,16 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
                 if "4" not in quantization:
                     return False
         if isinstance(llm_family, CustomLLMFamilyV2):
-            if llm_family.model_family not in VLLM_SUPPORTED_VISION_MODEL_LIST:
+            if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
                 return False
         else:
-            if llm_family.model_name not in VLLM_SUPPORTED_VISION_MODEL_LIST:
+            if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
                 return False
-        if "vision" not in llm_family.model_ability:
+        if (
+            "vision" not in llm_family.model_ability
+            and "audio" not in llm_family.model_ability
+            and "omni" not in llm_family.model_ability
+        ):
             return False
         return VLLM_INSTALLED
@@ -1394,13 +1507,21 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
     ) -> VLLMModelConfig:
         model_config = super()._sanitize_model_config(model_config)
         if VLLM_VERSION >= version.parse("0.5.5"):
-            model_config["limit_mm_per_prompt"] = (
-                json.loads(model_config.get("limit_mm_per_prompt"))  # type: ignore
-                if model_config.get("limit_mm_per_prompt")
-                else {
-                    "image": 2,  # default 2 images all chat
-                }
-            )
+            if model_config.get("limit_mm_per_prompt"):
+                model_config["limit_mm_per_prompt"] = json.loads(
+                    model_config.get("limit_mm_per_prompt")  # type: ignore
+                )
+            else:
+                if "omni" in self.model_family.model_ability:
+                    model_config["limit_mm_per_prompt"] = {
+                        "image": 2,
+                        "video": 2,
+                        "audio": 2,
+                    }
+                elif "vision" in self.model_family.model_ability:
+                    model_config["limit_mm_per_prompt"] = {"image": 2, "video": 2}
+                elif "audio" in self.model_family.model_ability:
+                    model_config["limit_mm_per_prompt"] = {"audio": 2}
         return model_config
     def _sanitize_chat_config(
@@ -1434,7 +1555,10 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
         multi_modal_data = prompt.get("multi_modal_data")
         token_ids = await asyncio.to_thread(
-            self._tokenize, tokenizer, prompt_str, config  # type: ignore
+            self._tokenize,
+            tokenizer,
+            prompt_str,
+            config,  # type: ignore
         )
         return TokensPrompt(
             prompt_token_ids=token_ids, multi_modal_data=multi_modal_data
@@ -1450,9 +1574,13 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
         tools = generate_config.pop("tools", []) if generate_config else None
         model_family = self.model_family.model_family or self.model_family.model_name
+        audios, images, videos = None, None, None
         if "internvl" not in model_family.lower():
-            from qwen_vl_utils import process_vision_info
+            from qwen_omni_utils import (
+                process_audio_info,
+                process_mm_info,
+                process_vision_info,
+            )
             messages = self._transform_messages(messages)
@@ -1467,29 +1595,36 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
             if tools and model_family in QWEN_TOOL_CALL_FAMILY:
                 full_context_kwargs["tools"] = tools
             assert self.model_family.chat_template is not None
+            if "omni" in self.model_family.model_ability:
+                audios, images, videos = process_mm_info(
+                    messages, use_audio_in_video=True
+                )
+            elif "audio" in self.model_family.model_ability:
+                audios = process_audio_info(messages, use_audio_in_video=False)
+            elif "vision" in self.model_family.model_ability:
+                images, videos = process_vision_info(  # type: ignore
+                    messages, return_video_kwargs=False
+                )
             prompt = self.get_full_context(
                 messages, self.model_family.chat_template, **full_context_kwargs
             )
-            images, video_inputs = process_vision_info(messages)
-            if video_inputs:
-                raise ValueError("Not support video input now.")
-        else:
-            prompt, images = self.get_specific_prompt(model_family, messages)
-        if not images:
-            inputs = {
-                "prompt": prompt,
-            }
-        elif len(images) == 1:
-            inputs = {
-                "prompt": prompt,
-                "multi_modal_data": {"image": images[-1]},  # type: ignore
-            }
         else:
-            inputs = {
-                "prompt": prompt,
-                "multi_modal_data": {"image": images},  # type: ignore
-            }
+            prompt, images = self.get_specific_prompt(model_family, messages)
+        inputs = {"prompt": prompt, "multi_modal_data": {}, "mm_processor_kwargs": {}}
+        if images:
+            inputs["multi_modal_data"]["image"] = images
+        if videos:
+            inputs["multi_modal_data"]["video"] = videos
+        if audios:
+            inputs["multi_modal_data"]["audio"] = audios
+        if "omni" in self.model_family.model_ability:
+            inputs["mm_processor_kwargs"]["use_audio_in_video"] = True
+        if inputs["multi_modal_data"] == {}:
+            inputs.pop("multi_modal_data")
+        if inputs["mm_processor_kwargs"] == {}:
+            inputs.pop("mm_processor_kwargs")
         generate_config = self._sanitize_chat_config(generate_config)
         stream = generate_config.get("stream", None)

xinference/model/utils.py CHANGED Viewed

@@ -315,6 +315,11 @@ def set_all_random_seed(seed: int):
 class CancellableDownloader:
+    _global_lock = threading.Lock()
+    _active_instances = 0
+    _original_update = None  # Class-level original update method
+    _patch_lock = threading.Lock()  # Additional lock for patching operations
     def __init__(
         self,
         cancel_error_cls: Type[BaseException] = asyncio.CancelledError,
@@ -325,23 +330,23 @@ class CancellableDownloader:
             self._cancelled = threading.Event()
         self._done_event = threading.Event()
         self._cancel_error_cls = cancel_error_cls
-        self._original_update = None
         # progress for tqdm that is main
         self._main_progresses: Set[tqdm] = set()
         # progress for file downloader
         # mainly when tqdm unit is set
         self._download_progresses: Set[tqdm] = set()
-        # tqdm original update
-        self._original_tqdm_update = None
+        # Instance-specific tqdm tracking
+        self._patched_instances: Set[int] = set()
     def reset(self):
         self._main_progresses.clear()
         self._download_progresses.clear()
     def get_progress(self) -> float:
-        if self.cancelled or self.done:
-            # directly return 1.0 when cancelled or finished
+        if self.done:
+            # directly return 1.0 when finished
             return 1.0
+        # Don't return 1.0 when cancelled, calculate actual progress
         tasks = finished_tasks = 0
         for main_progress in self._main_progresses:
@@ -376,6 +381,7 @@ class CancellableDownloader:
     def cancel(self):
         self._cancelled.set()
+        self._done_event.set()
     @property
     def cancelled(self):
@@ -392,39 +398,76 @@ class CancellableDownloader:
         raise self._cancel_error_cls(error_msg)
     def patch_tqdm(self):
-        # patch tqdm
-        # raise error if cancelled
-        self._original_update = original_update = tqdm.update
-        downloader = self
-        def patched_update(self, n):
-            if downloader.cancelled:
-                downloader.raise_error()
-            if not self.disable:
-                progresses = (
-                    downloader._main_progresses
-                    if getattr(self, "unit", "it") == "it"
-                    else downloader._download_progresses
-                )
-                progresses.add(self)
-            return original_update(self, n)
-        tqdm.update = patched_update
+        # Use class-level patching to avoid conflicts
+        with self._patch_lock:
+            if self._original_update is None:
+                self._original_update = original_update = tqdm.update
+                # Thread-safe patched update
+                def patched_update(tqdm_instance, n):
+                    import gc
+                    # Get all CancellableDownloader instances and check for cancellation
+                    downloaders = [
+                        obj
+                        for obj in gc.get_objects()
+                        if isinstance(obj, CancellableDownloader)
+                    ]
+                    for downloader in downloaders:
+                        # if download cancelled, throw error
+                        if getattr(downloader, "cancelled", False):
+                            downloader.raise_error()
+                        progresses = None
+                        if not getattr(tqdm_instance, "disable", False):
+                            unit = getattr(tqdm_instance, "unit", "it")
+                            if unit == "it":
+                                progresses = getattr(
+                                    downloader, "_main_progresses", None
+                                )
+                            else:
+                                progresses = getattr(
+                                    downloader, "_download_progresses", None
+                                )
+                        if progresses is not None:
+                            progresses.add(tqdm_instance)
+                        else:
+                            logger.debug(
+                                f"No progresses found for downloader {downloader}"
+                            )
+                    # Call original update with safety check
+                    return original_update(tqdm_instance, n)
+                tqdm.update = patched_update
     def unpatch_tqdm(self):
-        from tqdm.auto import tqdm
-        if self._original_update:
-            tqdm.update = self._original_update
+        with self._patch_lock:
+            if self._original_update is not None and self._active_instances == 0:
+                tqdm.update = self._original_update
+                self._original_update = None
     def __enter__(self):
-        self.patch_tqdm()
+        # Use global lock to prevent concurrent patching
+        with self._global_lock:
+            if self._active_instances == 0:
+                self.patch_tqdm()
+            self._active_instances += 1
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.unpatch_tqdm()
-        self._done_event.set()
-        self.reset()
+        # Use global lock to prevent concurrent unpatching
+        with self._global_lock:
+            self._active_instances -= 1
+            if self._active_instances == 0:
+                self.unpatch_tqdm()
+        try:
+            self._done_event.set()
+            self.reset()
+        except Exception as e:
+            logger.debug(f"Error during CancellableDownloader cleanup: {e}")
 def get_engine_params_by_name(

xinference/thirdparty/audiotools/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+__version__ = "0.7.4"
+from .core import AudioSignal
+from .core import STFTParams
+from .core import Meter
+from .core import util
+from . import metrics
+from . import data
+from . import ml
+from .data import datasets
+from .data import transforms

xinference/thirdparty/audiotools/core/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from . import util
+from .audio_signal import AudioSignal
+from .audio_signal import STFTParams
+from .loudness import Meter

xinference 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

Potentially problematic release.

xinference 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl