PyPI - xinference - Versions diffs - 0.16.0__py3-none-any.whl → 0.16.2__py3-none-any.whl - Mend

xinference 0.16.0py3-none-any.whl → 0.16.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (62) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +48 -0
xinference/client/restful/restful_client.py +19 -0
xinference/constants.py +1 -0
xinference/core/chat_interface.py +5 -1
xinference/core/image_interface.py +5 -1
xinference/core/model.py +106 -16
xinference/core/scheduler.py +1 -1
xinference/core/worker.py +3 -1
xinference/deploy/supervisor.py +0 -4
xinference/model/audio/chattts.py +25 -14
xinference/model/audio/core.py +6 -2
xinference/model/audio/model_spec.json +1 -1
xinference/model/audio/model_spec_modelscope.json +1 -1
xinference/model/core.py +3 -1
xinference/model/embedding/core.py +6 -2
xinference/model/embedding/model_spec.json +1 -1
xinference/model/image/core.py +65 -6
xinference/model/image/model_spec.json +24 -3
xinference/model/image/model_spec_modelscope.json +25 -3
xinference/model/image/ocr/__init__.py +13 -0
xinference/model/image/ocr/got_ocr2.py +79 -0
xinference/model/image/scheduler/flux.py +1 -1
xinference/model/image/stable_diffusion/core.py +2 -3
xinference/model/image/stable_diffusion/mlx.py +221 -0
xinference/model/llm/__init__.py +33 -0
xinference/model/llm/core.py +3 -1
xinference/model/llm/llm_family.json +9 -0
xinference/model/llm/llm_family.py +68 -2
xinference/model/llm/llm_family_modelscope.json +11 -0
xinference/model/llm/llm_family_openmind_hub.json +1359 -0
xinference/model/rerank/core.py +9 -1
xinference/model/utils.py +7 -0
xinference/model/video/core.py +6 -2
xinference/thirdparty/mlx/__init__.py +13 -0
xinference/thirdparty/mlx/flux/__init__.py +15 -0
xinference/thirdparty/mlx/flux/autoencoder.py +357 -0
xinference/thirdparty/mlx/flux/clip.py +154 -0
xinference/thirdparty/mlx/flux/datasets.py +75 -0
xinference/thirdparty/mlx/flux/flux.py +247 -0
xinference/thirdparty/mlx/flux/layers.py +302 -0
xinference/thirdparty/mlx/flux/lora.py +76 -0
xinference/thirdparty/mlx/flux/model.py +134 -0
xinference/thirdparty/mlx/flux/sampler.py +56 -0
xinference/thirdparty/mlx/flux/t5.py +244 -0
xinference/thirdparty/mlx/flux/tokenizers.py +185 -0
xinference/thirdparty/mlx/flux/trainer.py +98 -0
xinference/thirdparty/mlx/flux/utils.py +179 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.f7da0140.js → main.2f269bb3.js} +3 -3
xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/METADATA +16 -9
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/RECORD +60 -42
xinference/web/ui/build/static/js/main.f7da0140.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/070d8c6b3b0f3485c6d3885f0b6bbfdf9643e088a468acbd5d596f2396071c16.json +0 -1
/xinference/web/ui/build/static/js/{main.f7da0140.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/LICENSE +0 -0
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/WHEEL +0 -0
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/entry_points.txt +0 -0
{xinference-0.16.0.dist-info → xinference-0.16.2.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2024-10-18T12:49:02+0800",
+ "date": "2024-11-01T17:56:47+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "5f7dea44832a1c41f887b9a01377191894550057",
- "version": "0.16.0"
+ "full-revisionid": "67e97ab485b539dc7a208825bee0504acc37044e",
+ "version": "0.16.2"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -567,6 +567,16 @@ class RESTfulAPI:
                 else None
             ),
         )
+        self._router.add_api_route(
+            "/v1/images/ocr",
+            self.create_ocr,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
         # SD WebUI API
         self._router.add_api_route(
             "/sdapi/v1/options",
@@ -1754,6 +1764,44 @@ class RESTfulAPI:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
+    async def create_ocr(
+        self,
+        model: str = Form(...),
+        image: UploadFile = File(media_type="application/octet-stream"),
+        kwargs: Optional[str] = Form(None),
+    ) -> Response:
+        model_uid = model
+        try:
+            model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+        try:
+            if kwargs is not None:
+                parsed_kwargs = json.loads(kwargs)
+            else:
+                parsed_kwargs = {}
+            im = Image.open(image.file)
+            text = await model_ref.ocr(
+                image=im,
+                **parsed_kwargs,
+            )
+            return Response(content=text, media_type="text/plain")
+        except RuntimeError as re:
+            logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
     async def create_flexible_infer(self, request: Request) -> Response:
         payload = await request.json()

xinference/client/restful/restful_client.py CHANGED Viewed

@@ -369,6 +369,25 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
         response_data = response.json()
         return response_data
+    def ocr(self, image: Union[str, bytes], **kwargs):
+        url = f"{self._base_url}/v1/images/ocr"
+        params = {
+            "model": self._model_uid,
+            "kwargs": json.dumps(kwargs),
+        }
+        files: List[Any] = []
+        for key, value in params.items():
+            files.append((key, (None, value)))
+        files.append(("image", ("image", image, "application/octet-stream")))
+        response = requests.post(url, files=files, headers=self.auth_headers)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to ocr the images, detail: {_get_error_string(response)}"
+            )
+        response_data = response.json()
+        return response_data
 class RESTfulVideoModelHandle(RESTfulModelHandle):
     def text_to_video(

xinference/constants.py CHANGED Viewed

@@ -39,6 +39,7 @@ def get_xinference_home() -> str:
         # if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
         os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
         os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
+        os.environ["XDG_CACHE_HOME"] = os.path.join(home_path, "openmind_hub")
     # In multi-tenant mode,
     # gradio's temporary files are stored in their respective home directories,
     # to prevent insufficient permissions

xinference/core/chat_interface.py CHANGED Viewed

@@ -74,7 +74,11 @@ class GradioInterface:
         # Gradio initiates the queue during a startup event, but since the app has already been
         # started, that event will not run, so manually invoke the startup events.
         # See: https://github.com/gradio-app/gradio/issues/5228
-        interface.startup_events()
+        try:
+            interface.run_startup_events()
+        except AttributeError:
+            # compatibility
+            interface.startup_events()
         favicon_path = os.path.join(
             os.path.dirname(os.path.abspath(__file__)),
             os.path.pardir,

xinference/core/image_interface.py CHANGED Viewed

@@ -63,7 +63,11 @@ class ImageInterface:
         # Gradio initiates the queue during a startup event, but since the app has already been
         # started, that event will not run, so manually invoke the startup events.
         # See: https://github.com/gradio-app/gradio/issues/5228
-        interface.startup_events()
+        try:
+            interface.run_startup_events()
+        except AttributeError:
+            # compatibility
+            interface.startup_events()
         favicon_path = os.path.join(
             os.path.dirname(os.path.abspath(__file__)),
             os.path.pardir,

xinference/core/model.py CHANGED Viewed

@@ -17,10 +17,10 @@ import functools
 import inspect
 import json
 import os
+import queue
 import time
 import types
 import uuid
-import weakref
 from asyncio.queues import Queue
 from asyncio.tasks import wait_for
 from concurrent.futures import Future as ConcurrentFuture
@@ -32,7 +32,6 @@ from typing import (
     Callable,
     Dict,
     Generator,
-    Iterator,
     List,
     Optional,
     Union,
@@ -209,9 +208,8 @@ class ModelActor(xo.StatelessActor):
             model_description.to_dict() if model_description else {}
         )
         self._request_limits = request_limits
-        self._generators: Dict[str, Union[Iterator, AsyncGenerator]] = {}
-        self._current_generator = lambda: None
+        self._pending_requests: asyncio.Queue = asyncio.Queue()
+        self._handle_pending_requests_task = None
         self._lock = (
             None
             if isinstance(
@@ -237,6 +235,10 @@ class ModelActor(xo.StatelessActor):
     async def __post_create__(self):
         self._loop = asyncio.get_running_loop()
+        self._handle_pending_requests_task = asyncio.create_task(
+            self._handle_pending_requests()
+        )
         if self.allow_batching():
             from .scheduler import SchedulerActor
@@ -474,6 +476,43 @@ class ModelActor(xo.StatelessActor):
                 )
             await asyncio.gather(*coros)
+    async def _handle_pending_requests(self):
+        logger.info("Start requests handler.")
+        while True:
+            gen, stream_out, stop = await self._pending_requests.get()
+            async def _async_wrapper(_gen):
+                try:
+                    # anext is only available for Python >= 3.10
+                    return await _gen.__anext__()  # noqa: F821
+                except StopAsyncIteration:
+                    return stop
+            def _wrapper(_gen):
+                # Avoid issue: https://github.com/python/cpython/issues/112182
+                try:
+                    return next(_gen)
+                except StopIteration:
+                    return stop
+            while True:
+                try:
+                    if inspect.isgenerator(gen):
+                        r = await asyncio.to_thread(_wrapper, gen)
+                    elif inspect.isasyncgen(gen):
+                        r = await _async_wrapper(gen)
+                    else:
+                        raise Exception(
+                            f"The generator {gen} should be a generator or an async generator, "
+                            f"but a {type(gen)} is got."
+                        )
+                    stream_out.put_nowait(r)
+                    if r is not stop:
+                        continue
+                except Exception:
+                    logger.exception("stream encountered an error.")
+                break
     async def _call_wrapper_json(self, fn: Callable, *args, **kwargs):
         return await self._call_wrapper("json", fn, *args, **kwargs)
@@ -487,6 +526,13 @@ class ModelActor(xo.StatelessActor):
                 ret = await fn(*args, **kwargs)
             else:
                 ret = await asyncio.to_thread(fn, *args, **kwargs)
+            if inspect.isgenerator(ret):
+                gen = self._to_generator(output_type, ret)
+                return gen
+            if inspect.isasyncgen(ret):
+                gen = self._to_async_gen(output_type, ret)
+                return gen
         else:
             async with self._lock:
                 if inspect.iscoroutinefunction(fn):
@@ -494,17 +540,40 @@ class ModelActor(xo.StatelessActor):
                 else:
                     ret = await asyncio.to_thread(fn, *args, **kwargs)
-        if self._lock is not None and self._current_generator():
-            raise Exception("Parallel generation is not supported by llama-cpp-python.")
+                stream_out: Union[queue.Queue, asyncio.Queue]
+                if inspect.isgenerator(ret):
+                    gen = self._to_generator(output_type, ret)
+                    stream_out = queue.Queue()
+                    stop = object()
+                    self._pending_requests.put_nowait((gen, stream_out, stop))
+                    def _stream_out_generator():
+                        while True:
+                            o = stream_out.get()
+                            if o is stop:
+                                break
+                            else:
+                                yield o
+                    return _stream_out_generator()
+                if inspect.isasyncgen(ret):
+                    gen = self._to_async_gen(output_type, ret)
+                    stream_out = asyncio.Queue()
+                    stop = object()
+                    self._pending_requests.put_nowait((gen, stream_out, stop))
+                    async def _stream_out_async_gen():
+                        while True:
+                            o = await stream_out.get()
+                            if o is stop:
+                                break
+                            else:
+                                yield o
+                    return _stream_out_async_gen()
-        if inspect.isgenerator(ret):
-            gen = self._to_generator(output_type, ret)
-            self._current_generator = weakref.ref(gen)
-            return gen
-        if inspect.isasyncgen(ret):
-            gen = self._to_async_gen(output_type, ret)
-            self._current_generator = weakref.ref(gen)
-            return gen
         if output_type == "json":
             return await asyncio.to_thread(json_dumps, ret)
         else:
@@ -592,7 +661,6 @@ class ModelActor(xo.StatelessActor):
                 prompt_or_messages, queue, call_ability, *args, **kwargs
             )
             gen = self._to_async_gen("json", ret)
-            self._current_generator = weakref.ref(gen)
             return gen
         else:
             from .scheduler import XINFERENCE_NON_STREAMING_ABORT_FLAG
@@ -953,6 +1021,25 @@ class ModelActor(xo.StatelessActor):
             f"Model {self._model.model_spec} is not for creating image."
         )
+    @log_async(
+        logger=logger,
+        ignore_kwargs=["image"],
+    )
+    async def ocr(
+        self,
+        image: "PIL.Image",
+        *args,
+        **kwargs,
+    ):
+        if hasattr(self._model, "ocr"):
+            return await self._call_wrapper_json(
+                self._model.ocr,
+                image,
+                *args,
+                **kwargs,
+            )
+        raise AttributeError(f"Model {self._model.model_spec} is not for ocr.")
     @request_limit
     @log_async(logger=logger, ignore_kwargs=["image"])
     async def infer(
@@ -994,3 +1081,6 @@ class ModelActor(xo.StatelessActor):
     async def record_metrics(self, name, op, kwargs):
         worker_ref = await self._get_worker_ref()
         await worker_ref.record_metrics(name, op, kwargs)
+    async def get_pending_requests_count(self):
+        return self._pending_requests.qsize()

xinference/core/scheduler.py CHANGED Viewed

@@ -79,7 +79,7 @@ class InferenceRequest:
         # For tool call
         self.tools = None
         # Currently, for storing tool call streaming results.
-        self.outputs: List[str] = []
+        self.outputs: List[str] = []  # type: ignore
         # inference results,
         # it is a list type because when stream=True,
         # self.completion contains all the results in a decode round.

xinference/core/worker.py CHANGED Viewed

@@ -785,7 +785,9 @@ class WorkerActor(xo.StatelessActor):
         peft_model_config: Optional[PeftModelConfig] = None,
         request_limits: Optional[int] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
-        download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+        download_hub: Optional[
+            Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+        ] = None,
         model_path: Optional[str] = None,
         **kwargs,
     ):

xinference/deploy/supervisor.py CHANGED Viewed

@@ -31,10 +31,6 @@ from .utils import health_check
 logger = logging.getLogger(__name__)
-from ..model import _install as install_model
-install_model()
 async def _start_supervisor(address: str, logging_conf: Optional[Dict] = None):
     logging.config.dictConfig(logging_conf)  # type: ignore

xinference/model/audio/chattts.py CHANGED Viewed

@@ -54,7 +54,11 @@ class ChatTTSModel:
         torch.set_float32_matmul_precision("high")
         self._model = ChatTTS.Chat()
         logger.info("Load ChatTTS model with kwargs: %s", self._kwargs)
-        self._model.load(source="custom", custom_path=self._model_path, **self._kwargs)
+        ok = self._model.load(
+            source="custom", custom_path=self._model_path, **self._kwargs
+        )
+        if not ok:
+            raise Exception(f"The ChatTTS model is not correct: {self._model_path}")
     def speech(
         self,
@@ -114,16 +118,15 @@ class ChatTTSModel:
                     last_pos = 0
                     with writer.open():
                         for it in iter:
-                            for itt in it:
-                                for chunk in itt:
-                                    chunk = np.array([chunk]).transpose()
-                                    writer.write_audio_chunk(i, torch.from_numpy(chunk))
-                                    new_last_pos = out.tell()
-                                    if new_last_pos != last_pos:
-                                        out.seek(last_pos)
-                                        encoded_bytes = out.read()
-                                        yield encoded_bytes
-                                        last_pos = new_last_pos
+                            for chunk in it:
+                                chunk = np.array([chunk]).transpose()
+                                writer.write_audio_chunk(i, torch.from_numpy(chunk))
+                                new_last_pos = out.tell()
+                                if new_last_pos != last_pos:
+                                    out.seek(last_pos)
+                                    encoded_bytes = out.read()
+                                    yield encoded_bytes
+                                    last_pos = new_last_pos
             return _generator()
         else:
@@ -131,7 +134,15 @@ class ChatTTSModel:
             # Save the generated audio
             with BytesIO() as out:
-                torchaudio.save(
-                    out, torch.from_numpy(wavs[0]), 24000, format=response_format
-                )
+                try:
+                    torchaudio.save(
+                        out,
+                        torch.from_numpy(wavs[0]).unsqueeze(0),
+                        24000,
+                        format=response_format,
+                    )
+                except:
+                    torchaudio.save(
+                        out, torch.from_numpy(wavs[0]), 24000, format=response_format
+                    )
                 return out.getvalue()

xinference/model/audio/core.py CHANGED Viewed

@@ -100,7 +100,9 @@ def generate_audio_description(
 def match_audio(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> AudioModelFamilyV1:
     from ..utils import download_from_modelscope
     from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
@@ -152,7 +154,9 @@ def create_audio_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -127,7 +127,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_id": "2Noise/ChatTTS",
-    "model_revision": "ce5913842aebd78e4a01a02d47244b8d62ac4ee3",
+    "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
     "model_ability": "text-to-audio",
     "multilingual": true
   },

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -42,7 +42,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_hub": "modelscope",
-    "model_id": "pzc163/chatTTS",
+    "model_id": "AI-ModelScope/ChatTTS",
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true

xinference/model/core.py CHANGED Viewed

@@ -55,7 +55,9 @@ def create_model_instance(
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:

xinference/model/embedding/core.py CHANGED Viewed

@@ -433,7 +433,9 @@ class EmbeddingModel:
 def match_embedding(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> EmbeddingModelSpec:
     from ..utils import download_from_modelscope
     from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
@@ -469,7 +471,9 @@ def create_embedding_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:

xinference/model/embedding/model_spec.json CHANGED Viewed

@@ -233,7 +233,7 @@
   },
   {
     "model_name": "gte-Qwen2",
-    "dimensions": 3584,
+    "dimensions": 4096,
     "max_tokens": 32000,
     "language": ["zh", "en"],
     "model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",

xinference/model/image/core.py CHANGED Viewed

@@ -11,17 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
 import logging
 import os
+import platform
 from collections import defaultdict
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Dict, List, Literal, Optional, Tuple, Union
 from ...constants import XINFERENCE_CACHE_DIR
 from ...types import PeftModelConfig
 from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
+from .ocr.got_ocr2 import GotOCR2Model
 from .stable_diffusion.core import DiffusionModel
+from .stable_diffusion.mlx import MLXDiffusionModel
 logger = logging.getLogger(__name__)
@@ -45,6 +49,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
     model_hub: str = "huggingface"
     model_ability: Optional[List[str]]
     controlnet: Optional[List["ImageModelFamilyV1"]]
+    default_model_config: Optional[dict] = {}
     default_generate_config: Optional[dict] = {}
@@ -120,7 +125,9 @@ def generate_image_description(
 def match_diffusion(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> ImageModelFamilyV1:
     from ..utils import download_from_modelscope
     from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
@@ -180,17 +187,59 @@ def get_cache_status(
         return valid_model_revision(meta_path, model_spec.model_revision)
+def create_ocr_model_instance(
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_spec: ImageModelFamilyV1,
+    model_path: Optional[str] = None,
+    **kwargs,
+) -> Tuple[GotOCR2Model, ImageModelDescription]:
+    if not model_path:
+        model_path = cache(model_spec)
+    model = GotOCR2Model(
+        model_uid,
+        model_path,
+        model_spec=model_spec,
+        **kwargs,
+    )
+    model_description = ImageModelDescription(
+        subpool_addr, devices, model_spec, model_path=model_path
+    )
+    return model, model_description
 def create_image_model_instance(
     subpool_addr: str,
     devices: List[str],
     model_uid: str,
     model_name: str,
     peft_model_config: Optional[PeftModelConfig] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
-) -> Tuple[DiffusionModel, ImageModelDescription]:
+) -> Tuple[
+    Union[DiffusionModel, MLXDiffusionModel, GotOCR2Model], ImageModelDescription
+]:
     model_spec = match_diffusion(model_name, download_hub)
+    if model_spec.model_ability and "ocr" in model_spec.model_ability:
+        return create_ocr_model_instance(
+            subpool_addr=subpool_addr,
+            devices=devices,
+            model_uid=model_uid,
+            model_name=model_name,
+            model_spec=model_spec,
+            model_path=model_path,
+            **kwargs,
+        )
+    # use default model config
+    model_default_config = (model_spec.default_model_config or {}).copy()
+    model_default_config.update(kwargs)
+    kwargs = model_default_config
     controlnet = kwargs.get("controlnet")
     # Handle controlnet
     if controlnet is not None:
@@ -232,10 +281,20 @@ def create_image_model_instance(
         lora_load_kwargs = None
         lora_fuse_kwargs = None
-    model = DiffusionModel(
+    if (
+        platform.system() == "Darwin"
+        and "arm" in platform.machine().lower()
+        and model_name in MLXDiffusionModel.supported_models
+    ):
+        # Mac with M series silicon chips
+        model_cls = MLXDiffusionModel
+    else:
+        model_cls = DiffusionModel  # type: ignore
+    model = model_cls(
         model_uid,
         model_path,
-        lora_model_paths=lora_model,
+        lora_model=lora_model,
         lora_load_kwargs=lora_load_kwargs,
         lora_fuse_kwargs=lora_fuse_kwargs,
         model_spec=model_spec,

xinference 0.16.0__py3-none-any.whl → 0.16.2__py3-none-any.whl

Potentially problematic release.

xinference 0.16.0py3-none-any.whl → 0.16.2py3-none-any.whl