PyPI - xinference - Versions diffs - 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl - Mend

xinference 0.15.4py3-none-any.whl → 0.16.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (67) hide show

xinference/__init__.py +0 -4
xinference/_version.py +3 -3
xinference/api/restful_api.py +48 -0
xinference/client/restful/restful_client.py +19 -0
xinference/constants.py +4 -4
xinference/core/chat_interface.py +5 -1
xinference/core/image_interface.py +5 -1
xinference/core/model.py +195 -34
xinference/core/scheduler.py +10 -7
xinference/core/utils.py +9 -0
xinference/model/__init__.py +4 -0
xinference/model/audio/chattts.py +25 -14
xinference/model/audio/model_spec.json +1 -1
xinference/model/audio/model_spec_modelscope.json +1 -1
xinference/model/embedding/model_spec.json +1 -1
xinference/model/image/core.py +59 -4
xinference/model/image/model_spec.json +24 -3
xinference/model/image/model_spec_modelscope.json +25 -3
xinference/model/image/ocr/__init__.py +13 -0
xinference/model/image/ocr/got_ocr2.py +76 -0
xinference/model/image/scheduler/__init__.py +13 -0
xinference/model/image/scheduler/flux.py +533 -0
xinference/model/image/stable_diffusion/core.py +8 -34
xinference/model/image/stable_diffusion/mlx.py +221 -0
xinference/model/image/utils.py +39 -3
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +178 -1
xinference/model/llm/llm_family_modelscope.json +119 -0
xinference/model/llm/transformers/chatglm.py +104 -0
xinference/model/llm/transformers/core.py +37 -111
xinference/model/llm/transformers/deepseek_v2.py +0 -226
xinference/model/llm/transformers/internlm2.py +3 -95
xinference/model/llm/transformers/opt.py +68 -0
xinference/model/llm/transformers/utils.py +4 -284
xinference/model/llm/utils.py +2 -2
xinference/model/llm/vllm/core.py +16 -1
xinference/thirdparty/mlx/__init__.py +13 -0
xinference/thirdparty/mlx/flux/__init__.py +15 -0
xinference/thirdparty/mlx/flux/autoencoder.py +357 -0
xinference/thirdparty/mlx/flux/clip.py +154 -0
xinference/thirdparty/mlx/flux/datasets.py +75 -0
xinference/thirdparty/mlx/flux/flux.py +247 -0
xinference/thirdparty/mlx/flux/layers.py +302 -0
xinference/thirdparty/mlx/flux/lora.py +76 -0
xinference/thirdparty/mlx/flux/model.py +134 -0
xinference/thirdparty/mlx/flux/sampler.py +56 -0
xinference/thirdparty/mlx/flux/t5.py +244 -0
xinference/thirdparty/mlx/flux/tokenizers.py +185 -0
xinference/thirdparty/mlx/flux/trainer.py +98 -0
xinference/thirdparty/mlx/flux/utils.py +179 -0
xinference/utils.py +2 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.e51a356d.js → main.b76aeeb7.js} +3 -3
xinference/web/ui/build/static/js/main.b76aeeb7.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/METADATA +49 -10
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/RECORD +64 -44
xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
/xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.b76aeeb7.js.LICENSE.txt} +0 -0
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/LICENSE +0 -0
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/WHEEL +0 -0
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/entry_points.txt +0 -0
{xinference-0.15.4.dist-info → xinference-0.16.1.dist-info}/top_level.txt +0 -0

xinference/model/audio/chattts.py CHANGED Viewed

@@ -54,7 +54,11 @@ class ChatTTSModel:
         torch.set_float32_matmul_precision("high")
         self._model = ChatTTS.Chat()
         logger.info("Load ChatTTS model with kwargs: %s", self._kwargs)
-        self._model.load(source="custom", custom_path=self._model_path, **self._kwargs)
+        ok = self._model.load(
+            source="custom", custom_path=self._model_path, **self._kwargs
+        )
+        if not ok:
+            raise Exception(f"The ChatTTS model is not correct: {self._model_path}")
     def speech(
         self,
@@ -114,16 +118,15 @@ class ChatTTSModel:
                     last_pos = 0
                     with writer.open():
                         for it in iter:
-                            for itt in it:
-                                for chunk in itt:
-                                    chunk = np.array([chunk]).transpose()
-                                    writer.write_audio_chunk(i, torch.from_numpy(chunk))
-                                    new_last_pos = out.tell()
-                                    if new_last_pos != last_pos:
-                                        out.seek(last_pos)
-                                        encoded_bytes = out.read()
-                                        yield encoded_bytes
-                                        last_pos = new_last_pos
+                            for chunk in it:
+                                chunk = np.array([chunk]).transpose()
+                                writer.write_audio_chunk(i, torch.from_numpy(chunk))
+                                new_last_pos = out.tell()
+                                if new_last_pos != last_pos:
+                                    out.seek(last_pos)
+                                    encoded_bytes = out.read()
+                                    yield encoded_bytes
+                                    last_pos = new_last_pos
             return _generator()
         else:
@@ -131,7 +134,15 @@ class ChatTTSModel:
             # Save the generated audio
             with BytesIO() as out:
-                torchaudio.save(
-                    out, torch.from_numpy(wavs[0]), 24000, format=response_format
-                )
+                try:
+                    torchaudio.save(
+                        out,
+                        torch.from_numpy(wavs[0]).unsqueeze(0),
+                        24000,
+                        format=response_format,
+                    )
+                except:
+                    torchaudio.save(
+                        out, torch.from_numpy(wavs[0]), 24000, format=response_format
+                    )
                 return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -127,7 +127,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_id": "2Noise/ChatTTS",
-    "model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
+    "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
     "model_ability": "text-to-audio",
     "multilingual": true
   },

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -42,7 +42,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_hub": "modelscope",
-    "model_id": "pzc163/chatTTS",
+    "model_id": "AI-ModelScope/ChatTTS",
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -233,7 +233,7 @@
   },
   {
     "model_name": "gte-Qwen2",
-    "dimensions": 3584,
+    "dimensions": 4096,
     "max_tokens": 32000,
     "language": ["zh", "en"],
     "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",

xinference/model/image/core.py CHANGED Viewed

@@ -11,17 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
 import logging
 import os
+import platform
 from collections import defaultdict
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import PeftModelConfig
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
+from .ocr.got_ocr2 import GotOCR2Model
 from .stable_diffusion.core import DiffusionModel
+from .stable_diffusion.mlx import MLXDiffusionModel
 logger = logging.getLogger(__name__)
@@ -45,6 +49,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_hub: str = "huggingface"
     model_ability: Optional[List[str]]
     controlnet: Optional[List["ImageModelFamilyV1"]]
+    default_model_config: Optional[dict] = {}
     default_generate_config: Optional[dict] = {}
@@ -180,6 +185,28 @@ def get_cache_status(
         return valid_model_revision(meta_path, model_spec.model_revision)
+def create_ocr_model_instance(
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_spec: ImageModelFamilyV1,
+    model_path: Optional[str] = None,
+    **kwargs,
+) -> Tuple[GotOCR2Model, ImageModelDescription]:
+    if not model_path:
+        model_path = cache(model_spec)
+    model = GotOCR2Model(
+        model_uid,
+        model_path,
+        model_spec=model_spec,
+        **kwargs,
+    )
+    model_description = ImageModelDescription(
+        subpool_addr, devices, model_spec, model_path=model_path
+    )
+    return model, model_description
 def create_image_model_instance(
     subpool_addr: str,
     devices: List[str],
@@ -189,8 +216,26 @@ def create_image_model_instance(
     download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
     model_path: Optional[str] = None,
     **kwargs,
-) -> Tuple[DiffusionModel, ImageModelDescription]:
+) -> Tuple[
+    Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
+]:
     model_spec = match_diffusion(model_name, download_hub)
+    if model_spec.model_ability and "ocr" in model_spec.model_ability:
+        return create_ocr_model_instance(
+            subpool_addr=subpool_addr,
+            devices=devices,
+            model_uid=model_uid,
+            model_name=model_name,
+            model_spec=model_spec,
+            model_path=model_path,
+            **kwargs,
+        )
+    # use default model config
+    model_default_config = (model_spec.default_model_config or {}).copy()
+    model_default_config.update(kwargs)
+    kwargs = model_default_config
     controlnet = kwargs.get("controlnet")
     # Handle controlnet
     if controlnet is not None:
@@ -232,10 +277,20 @@ def create_image_model_instance(
         lora_load_kwargs = None
         lora_fuse_kwargs = None
-    model = DiffusionModel(
+    if (
+        platform.system() == "Darwin"
+        and "arm" in platform.machine().lower()
+        and model_name in MLXDiffusionModel.supported_models
+    ):
+        # Mac with M series silicon chips
+        model_cls = MLXDiffusionModel
+    else:
+        model_cls = DiffusionModel  # type: ignore
+    model = model_cls(
         model_uid,
         model_path,
-        lora_model_paths=lora_model,
+        lora_model=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
         model_spec=model_spec,

xinference/model/image/model_spec.json CHANGED Viewed

@@ -8,7 +8,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_2"
+    }
   },
   {
     "model_name": "FLUX.1-dev",
@@ -19,7 +23,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_2"
+    }
   },
   {
     "model_name": "sd3-medium",
@@ -30,7 +38,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_3"
+    }
   },
   {
     "model_name": "sd-turbo",
@@ -178,5 +190,14 @@
     "model_ability": [
       "inpainting"
     ]
+  },
+  {
+    "model_name": "GOT-OCR2_0",
+    "model_family": "ocr",
+    "model_id": "stepfun-ai/GOT-OCR2_0",
+    "model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
+    "model_ability": [
+      "ocr"
+    ]
   }
 ]

xinference/model/image/model_spec_modelscope.json CHANGED Viewed

@@ -9,7 +9,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_2"
+    }
   },
   {
     "model_name": "FLUX.1-dev",
@@ -21,7 +25,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_2"
+    }
   },
   {
     "model_name": "sd3-medium",
@@ -33,7 +41,11 @@
       "text2image",
       "image2image",
       "inpainting"
-    ]
+    ],
+    "default_model_config": {
+      "quantize": true,
+      "quantize_text_encoder": "text_encoder_3"
+    }
   },
   {
     "model_name": "sd-turbo",
@@ -148,5 +160,15 @@
         "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
       }
     ]
+  },
+  {
+    "model_name": "GOT-OCR2_0",
+    "model_family": "ocr",
+    "model_id": "stepfun-ai/GOT-OCR2_0",
+    "model_revision": "master",
+    "model_hub": "modelscope",
+    "model_ability": [
+      "ocr"
+    ]
   }
 ]

xinference/model/image/ocr/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference/model/image/ocr/got_ocr2.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Optional
+import PIL.Image
+if TYPE_CHECKING:
+    from ..core import ImageModelFamilyV1
+logger = logging.getLogger(__name__)
+class GotOCR2Model:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: Optional[str] = None,
+        device: Optional[str] = None,
+        model_spec: Optional["ImageModelFamilyV1"] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._device = device
+        # model info when loading
+        self._model = None
+        self._tokenizer = None
+        # info
+        self._model_spec = model_spec
+        self._abilities = model_spec.model_ability or []  # type: ignore
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._abilities
+    def load(self):
+        from transformers import AutoModel, AutoTokenizer
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            self._model_path, trust_remote_code=True
+        )
+        model = AutoModel.from_pretrained(
+            self._model_path,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            device_map="cuda",
+            use_safetensors=True,
+            pad_token_id=self._tokenizer.eos_token_id,
+        )
+        self._model = model.eval().cuda()
+    def ocr(
+        self,
+        image: PIL.Image,
+        **kwargs,
+    ):
+        logger.info("Got OCR 2.0 kwargs: %s", kwargs)
+        if "ocr_type" not in kwargs:
+            kwargs["ocr_type"] = "ocr"
+        assert self._model is not None
+        # This chat API limits the max new tokens inside.
+        return self._model.chat(self._tokenizer, image, gradio_input=True, **kwargs)

xinference/model/image/scheduler/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl

Potentially problematic release.

xinference 0.15.4py3-none-any.whl → 0.16.1py3-none-any.whl