PyPI - xinference - Versions diffs - 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl - Mend

xinference 1.9.1py3-none-any.whl → 1.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (334) hide show

xinference/model/audio/kokoro_zh.py ADDED Viewed

@@ -0,0 +1,124 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+import numpy as np
+from ...device_utils import get_available_device, is_device_available
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV2
+logger = logging.getLogger(__name__)
+REPO_ID = "hexgrad/Kokoro-82M-v1.1-zh"
+class KokoroZHModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV2",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self.model_family = model_spec
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+        self._en_pipeline = None
+    def _en_callable(self, text):
+        """
+        Fixing the issue of English words being skipped in the Chinese model.
+        from https://hf-mirror.com/hexgrad/Kokoro-82M-v1.1-zh/blob/main/samples/make_zh.py
+        """
+        if text == "Kokoro":
+            return "kˈOkəɹO"
+        elif text == "Sol":
+            return "sˈOl"
+        return next(self._en_pipeline(text)).phonemes
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        if self._device is None:
+            self._device = get_available_device()
+        else:
+            if not is_device_available(self._device):
+                raise ValueError(f"Device {self._device} is not available!")
+        import os
+        from kokoro import KModel, KPipeline
+        self._en_pipeline = KPipeline(lang_code="a", repo_id=REPO_ID, model=False)
+        config_path = os.path.join(self._model_path, "config.json")
+        model_path = os.path.join(self._model_path, "kokoro-v1_1-zh.pth")
+        lang_code = self._kwargs.get("lang_code", "z")
+        logger.info("Launching Kokoro model with language code: %s", lang_code)
+        self._model = KPipeline(
+            lang_code=lang_code,
+            model=KModel(config=config_path, model=model_path).to(self._device),
+            repo_id=REPO_ID,
+            en_callable=self._en_callable,
+            device=self._device,
+        )
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("Kokoro does not support stream mode.")
+        assert self._model is not None
+        if not voice:
+            voice = "zf_001"
+            logger.info("Auto select speaker: %s", voice)
+        elif voice.endswith(".pt"):
+            logger.info("Using custom voice pt: %s", voice)
+        else:
+            logger.info("Using voice: %s", voice)
+        logger.info("Speech kwargs: %s", kwargs)
+        generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
+        results = list(generator)
+        audio = np.concatenate([r[2] for r in results])
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out,
+                "w",
+                24000,
+                1,
+                format=response_format.upper(),
+            ) as f:
+                f.write(audio)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -685,7 +685,7 @@
         "model_revision": "4dcc16f297f2ff98a17b3726b16f5de5a5e45672"
       },
       "modelscope": {
-        "model_id": "SWivid/F5-TTS_Emilia-ZH-EN",
+        "model_id": "AI-ModelScope/F5-TTS",
         "model_revision": "master"
       }
     }
@@ -862,6 +862,26 @@
         "model_revision": "master"
       }
     }
+  },
+    {
+    "version": 2,
+    "model_name": "Kokoro-82M-v1.1-zh",
+    "model_family": "Kokoro-zh",
+    "model_ability": [
+      "text2audio",
+      "text2audio_zero_shot"
+    ],
+    "multilingual": false,
+    "model_src": {
+      "huggingface": {
+        "model_id": "hexgrad/Kokoro-82M-v1.1-zh",
+        "model_revision": "01e7505bd6a7a2ac4975463114c3a7650a9f7218"
+      },
+      "modelscope": {
+        "model_id": "AI-ModelScope/Kokoro-82M-v1.1-zh",
+        "model_revision": "master"
+      }
+    }
   },
   {
     "version": 2,
@@ -911,5 +931,42 @@
         "model_revision": "master"
       }
     }
+  },
+  {
+    "version": 2,
+    "model_name": "IndexTTS2",
+    "model_family": "IndexTTS2",
+    "model_ability": [
+      "text2audio",
+      "text2audio_zero_shot",
+      "text2audio_voice_cloning",
+      "text2audio_emotion_control"
+    ],
+    "multilingual": true,
+    "virtualenv": {
+      "packages": [
+        "transformers==4.52.1",
+        "#system_torch#",
+        "#system_numpy#",
+        "json5",
+        "munch",
+        "matplotlib",
+        "flatten_dict",
+        "julius",
+        "tensorboard",
+        "randomname",
+        "argbind"
+      ]
+    },
+    "model_src": {
+      "huggingface": {
+        "model_id": "IndexTeam/IndexTTS-2",
+        "model_revision": "main"
+      },
+      "modelscope": {
+        "model_id": "IndexTeam/IndexTTS-2",
+        "model_revision": "master"
+      }
+    }
   }
 ]

xinference/model/embedding/sentence_transformers/core.py CHANGED Viewed

@@ -265,10 +265,10 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
                     "clip" in self.model_family.model_name.lower()
                     or "jina-embeddings-v4" in self.model_family.model_name.lower()
                 ):
-                    if "input_ids" in features and hasattr(
-                        features["input_ids"], "numel"
-                    ):
-                        all_token_nums += features["input_ids"].numel()
+                    # support input_ids and text_input_ids
+                    for key in ["input_ids", "text_input_ids"]:
+                        if key in features and hasattr(features[key], "numel"):
+                            all_token_nums += features[key].numel()
                     if "pixel_values" in features and hasattr(
                         features["pixel_values"], "numel"
                     ):

xinference/model/embedding/vllm/core.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import importlib.util
+import json
 import logging
 from typing import List, Union
@@ -54,13 +55,18 @@ class VLLMEmbeddingModel(EmbeddingModel):
                 self._kwargs["hf_overrides"].update(
                     is_matryoshka=True,
                 )
+            elif isinstance(self._kwargs["hf_overrides"], str):
+                self._kwargs["hf_overrides"] = json.loads(self._kwargs["hf_overrides"])
+                self._kwargs["hf_overrides"].update(
+                    is_matryoshka=True,
+                )
         self._model = LLM(model=self._model_path, task="embed", **self._kwargs)
         self._tokenizer = self._model.get_tokenizer()
     @staticmethod
     def _get_detailed_instruct(task_description: str, query: str) -> str:
-        return f"Instruct: {task_description}\nQuery:{query}"
+        return f"Instruct: {task_description}\nQuery:{query}"  # noqa: E231
     @cache_clean
     def create_embedding(

xinference/model/image/model_spec.json CHANGED Viewed

@@ -352,6 +352,75 @@
       "no_build_isolation": true
     }
   },
+  {
+    "version": 2,
+    "model_name": "Qwen-Image-Edit-2509",
+    "model_family": "stable_diffusion",
+    "model_ability": [
+      "image2image"
+    ],
+    "model_src": {
+      "huggingface": {
+        "model_id": "Qwen/Qwen-Image-Edit-2509",
+        "model_revision": "d3968ef930e841f4c73640fb8afa3b306a78167e",
+        "gguf_model_id": "QuantStack/Qwen-Image-Edit-2509-GGUF",
+        "gguf_quantizations": [
+          "Q2_K",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
+      },
+      "modelscope": {
+        "model_id": "Qwen/Qwen-Image-Edit-2509",
+        "model_revision": "master",
+        "gguf_model_id": "QuantStack/Qwen-Image-Edit-2509-GGUF",
+        "gguf_quantizations": [
+          "Q2_K",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "gguf_model_file_name_template": "Qwen-Image-Edit-2509-{quantization}.gguf"
+      }
+    },
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder",
+      "torch_dtype": "bfloat16"
+    },
+    "default_generate_config": {
+      "true_cfg_scale": 4.0
+    },
+    "virtualenv": {
+      "packages": [
+        "git+https://github.com/huggingface/diffusers",
+        "peft>=0.17.0",
+        "#system_torch#",
+        "#system_numpy#"
+      ],
+      "no_build_isolation": true
+    }
+  },
   {
     "version": 2,
     "model_name": "sd3-medium",
@@ -824,13 +893,12 @@
         "deepspeed==0.12.3",
         "peft==0.4.0",
         "tiktoken==0.6.0",
-        "bitsandbytes==0.41.0",
-        "scikit-learn==1.2.2",
         "sentencepiece==0.1.99",
         "einops==0.6.1",
         "einops-exts==0.0.4",
         "timm==0.6.13",
-        "numpy==1.26.4"
+        "#system_numpy#",
+        "#system_torch#"
       ]
     },
     "model_src": {

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -836,7 +836,7 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
     def image_to_image(
         self,
-        image: PIL.Image,
+        image: Union[PIL.Image.Image, List[PIL.Image.Image]],
         prompt: Optional[Union[str, List[str]]] = None,
         n: int = 1,
         size: Optional[str] = None,
@@ -856,7 +856,10 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         if padding_image_to_multiple := kwargs.pop("padding_image_to_multiple", None):
             # Model like SD3 image to image requires image's height and width is times of 16
             # padding the image if specified
-            origin_x, origin_y = image.size
+            if isinstance(image, list):
+                origin_x, origin_y = image[0].size
+            else:
+                origin_x, origin_y = image.size
             kwargs["origin_size"] = (origin_x, origin_y)
             kwargs["is_padded"] = True
             image = self.pad_to_multiple(image, multiple=int(padding_image_to_multiple))
@@ -864,14 +867,20 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         if size:
             width, height = map(int, re.split(r"[^\d]+", size))
             if padding_image_to_multiple:
-                width, height = image.size
+                if isinstance(image, list):
+                    width, height = image[0].size
+                else:
+                    width, height = image.size
             kwargs["width"] = width
             kwargs["height"] = height
         else:
             # SD3 image2image cannot accept width and height
             allow_width_height = model_accept_param(["width", "height"], model)
             if allow_width_height:
-                kwargs["width"], kwargs["height"] = image.size
+                if isinstance(image, list):
+                    kwargs["width"], kwargs["height"] = image[0].size
+                else:
+                    kwargs["width"], kwargs["height"] = image.size
         # generate config for lightning
         self._gen_config_for_lightning(kwargs)

xinference/model/llm/__init__.py CHANGED Viewed

@@ -159,6 +159,10 @@ def load_model_family_from_json(json_filename, target_families):
                     BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name][
                         "reasoning_end_tag"
                     ] = model_spec.reasoning_end_tag
+                if model_spec.tool_parser:
+                    BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name][
+                        "tool_parser"
+                    ] = model_spec.tool_parser
         # register model family
         if "chat" in model_spec.model_ability:

xinference/model/llm/core.py CHANGED Viewed

@@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Union
 from ...core.utils import parse_replica_model_uid
 from ...types import PeftModelConfig
 from .reasoning_parser import ReasoningParser
+from .tool_parsers import TOOL_PARSERS
 if TYPE_CHECKING:
     from .llm_family import LLMFamilyV2, LLMSpecV1
@@ -59,6 +60,7 @@ class LLM(abc.ABC):
         self.quantization = model_family.model_specs[0].quantization
         self.model_path = model_path
         self.reasoning_parser = None
+        self.tool_parser = None
         if args:
             raise ValueError(f"Unrecognized positional arguments: {args}")
         if kwargs:
@@ -171,6 +173,14 @@ class LLM(abc.ABC):
             enable_thinking=enable_thinking,
         )
+    def prepare_parse_tool_calls(self):
+        if self.model_family.tool_parser is None:
+            return
+        if self.model_family.tool_parser not in TOOL_PARSERS:
+            return
+        tool_parser = TOOL_PARSERS[self.model_family.tool_parser]
+        self.tool_parser = tool_parser()
 # Context variable for passing per-request chat context (e.g., chat_template_kwargs).
 # This variable should be set at the beginning of each chat or stream_chat call.

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -122,6 +122,7 @@ class XllamaCppModel(LLM, ChatModelMixin):
         self.prepare_parse_reasoning_content(
             reasoning_content, enable_thinking=enable_thinking
         )
+        self.prepare_parse_tool_calls()
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path

xinference 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

Potentially problematic release.

xinference 1.9.1py3-none-any.whl → 1.10.1py3-none-any.whl