PyPI - xinference - Versions diffs - 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl - Mend

xinference 0.14.0.post1py3-none-any.whl → 0.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (50) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +54 -0
xinference/client/handlers.py +0 -3
xinference/client/restful/restful_client.py +51 -134
xinference/constants.py +1 -0
xinference/core/chat_interface.py +1 -4
xinference/core/image_interface.py +33 -5
xinference/core/model.py +28 -2
xinference/core/supervisor.py +37 -0
xinference/core/worker.py +128 -84
xinference/deploy/cmdline.py +1 -4
xinference/model/audio/core.py +11 -3
xinference/model/audio/funasr.py +114 -0
xinference/model/audio/model_spec.json +20 -0
xinference/model/audio/model_spec_modelscope.json +21 -0
xinference/model/audio/whisper.py +1 -1
xinference/model/core.py +12 -0
xinference/model/image/core.py +3 -4
xinference/model/image/model_spec.json +41 -13
xinference/model/image/model_spec_modelscope.json +30 -10
xinference/model/image/stable_diffusion/core.py +53 -2
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +83 -1
xinference/model/llm/llm_family_modelscope.json +85 -1
xinference/model/llm/pytorch/core.py +1 -0
xinference/model/llm/pytorch/minicpmv26.py +247 -0
xinference/model/llm/sglang/core.py +72 -34
xinference/model/llm/vllm/core.py +38 -0
xinference/model/video/__init__.py +62 -0
xinference/model/video/core.py +178 -0
xinference/model/video/diffusers.py +180 -0
xinference/model/video/model_spec.json +11 -0
xinference/model/video/model_spec_modelscope.json +12 -0
xinference/types.py +10 -24
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.ef2a203a.js → main.17ca0398.js} +3 -3
xinference/web/ui/build/static/js/main.17ca0398.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +1 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/METADATA +14 -8
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/RECORD +47 -40
xinference/web/ui/build/static/js/main.ef2a203a.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +0 -1
/xinference/web/ui/build/static/js/{main.ef2a203a.js.LICENSE.txt → main.17ca0398.js.LICENSE.txt} +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/LICENSE +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/WHEEL +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/entry_points.txt +0 -0
{xinference-0.14.0.post1.dist-info → xinference-0.14.1.dist-info}/top_level.txt +0 -0

xinference/model/llm/pytorch/minicpmv26.py ADDED Viewed

@@ -0,0 +1,247 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import json
+import logging
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from io import BytesIO
+from typing import Dict, Iterator, List, Optional, Union
+import requests
+import torch
+from PIL import Image
+from ....types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessage,
+    Completion,
+    CompletionChoice,
+    CompletionChunk,
+    CompletionUsage,
+)
+from ...utils import select_device
+from ..llm_family import LLMFamilyV1, LLMSpecV1
+from .core import PytorchChatModel, PytorchGenerateConfig
+logger = logging.getLogger(__name__)
+class MiniCPMV26Model(PytorchChatModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._device = None
+        self._tokenizer = None
+        self._model = None
+    @classmethod
+    def match(
+        cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        family = model_family.model_family or model_family.model_name
+        if "MiniCPM-V-2.6".lower() in family.lower():
+            return True
+        return False
+    def _get_model_class(self):
+        from transformers import AutoModel
+        return AutoModel
+    def load(self, **kwargs):
+        from transformers import AutoModel, AutoTokenizer
+        from transformers.generation import GenerationConfig
+        device = self._pytorch_model_config.get("device", "auto")
+        self._device = select_device(device)
+        self._device = (
+            "auto"
+            if self._device == "cuda" and self.quantization is None
+            else self._device
+        )
+        if "int4" in self.model_path and device == "mps":
+            logger.error(
+                "Error: running int4 model with bitsandbytes on Mac is not supported right now."
+            )
+            exit()
+        if self._check_tensorizer_integrity():
+            self._model, self._tokenizer = self._load_tensorizer()
+            return
+        if "int4" in self.model_path:
+            model = AutoModel.from_pretrained(self.model_path, trust_remote_code=True)
+        else:
+            model = AutoModel.from_pretrained(
+                self.model_path,
+                trust_remote_code=True,
+                torch_dtype=torch.float16,
+                device_map=self._device,
+            )
+        tokenizer = AutoTokenizer.from_pretrained(
+            self.model_path, trust_remote_code=True
+        )
+        self._model = model.eval()
+        self._tokenizer = tokenizer
+        # Specify hyperparameters for generation
+        self._model.generation_config = GenerationConfig.from_pretrained(
+            self.model_path,
+            trust_remote_code=True,
+        )
+        self._save_tensorizer()
+    def _message_content_to_chat(self, content):
+        def _load_image(_url):
+            if _url.startswith("data:"):
+                logging.info("Parse url by base64 decoder.")
+                # https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
+                # e.g. f"data:image/jpeg;base64,{base64_image}"
+                _type, data = _url.split(";")
+                _, ext = _type.split("/")
+                data = data[len("base64,") :]
+                data = base64.b64decode(data.encode("utf-8"))
+                return Image.open(BytesIO(data)).convert("RGB")
+            else:
+                try:
+                    response = requests.get(_url)
+                except requests.exceptions.MissingSchema:
+                    return Image.open(_url).convert("RGB")
+                else:
+                    return Image.open(BytesIO(response.content)).convert("RGB")
+        if not isinstance(content, str):
+            texts = []
+            image_urls = []
+            for c in content:
+                c_type = c.get("type")
+                if c_type == "text":
+                    texts.append(c["text"])
+                elif c_type == "image_url":
+                    image_urls.append(c["image_url"]["url"])
+            image_futures = []
+            with ThreadPoolExecutor() as executor:
+                for image_url in image_urls:
+                    fut = executor.submit(_load_image, image_url)
+                    image_futures.append(fut)
+            images = [fut.result() for fut in image_futures]
+            text = " ".join(texts)
+            if len(images) == 0:
+                return text, []
+            elif len(images) == 1:
+                return text, images
+            else:
+                raise RuntimeError("Only one image per message is supported")
+        return content, []
+    def chat(
+        self,
+        prompt: Union[str, List[Dict]],
+        system_prompt: Optional[str] = None,
+        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        generate_config: Optional[PytorchGenerateConfig] = None,
+    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+        stream = generate_config.get("stream", False) if generate_config else False
+        content, images_chat = self._message_content_to_chat(prompt)
+        msgs = []
+        query_to_response: List[Dict] = []
+        images_history = []
+        for h in chat_history or []:
+            role = h["role"]
+            content_h, images_tmp = self._message_content_to_chat(h["content"])
+            if images_tmp != []:
+                images_history = images_tmp
+            if len(query_to_response) == 0 and role == "user":
+                query_to_response.append({"role": "user", "content": content_h})
+            if len(query_to_response) == 1 and role == "assistant":
+                query_to_response.append({"role": "assistant", "content": content_h})
+            if len(query_to_response) == 2:
+                msgs.extend(query_to_response)
+                query_to_response = []
+        image = None
+        if len(images_chat) > 0:
+            image = images_chat[0]
+        elif len(images_history) > 0:
+            image = images_history[0]
+        msgs.append({"role": "user", "content": content})
+        chat = self._model.chat(
+            image=image,
+            msgs=json.dumps(msgs, ensure_ascii=True),
+            tokenizer=self._tokenizer,
+            sampling=True,
+            **generate_config
+        )
+        if stream:
+            it = self.chat_stream(chat)
+            return self._to_chat_completion_chunks(it)
+        else:
+            c = Completion(
+                id=str(uuid.uuid1()),
+                object="text_completion",
+                created=int(time.time()),
+                model=self.model_uid,
+                choices=[
+                    CompletionChoice(
+                        index=0, text=chat, finish_reason="stop", logprobs=None
+                    )
+                ],
+                usage=CompletionUsage(
+                    prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
+                ),
+            )
+            return self._to_chat_completion(c)
+    def chat_stream(self, chat) -> Iterator[CompletionChunk]:
+        completion_id = str(uuid.uuid1())
+        for new_text in chat:
+            completion_choice = CompletionChoice(
+                text=new_text, index=0, logprobs=None, finish_reason=None
+            )
+            chunk = CompletionChunk(
+                id=completion_id,
+                object="text_completion",
+                created=int(time.time()),
+                model=self.model_uid,
+                choices=[completion_choice],
+            )
+            completion_usage = CompletionUsage(
+                prompt_tokens=-1,
+                completion_tokens=-1,
+                total_tokens=-1,
+            )
+            chunk["usage"] = completion_usage
+            yield chunk
+        completion_choice = CompletionChoice(
+            text="", index=0, logprobs=None, finish_reason="stop"
+        )
+        chunk = CompletionChunk(
+            id=completion_id,
+            object="text_completion",
+            created=int(time.time()),
+            model=self.model_uid,
+            choices=[completion_choice],
+        )
+        completion_usage = CompletionUsage(
+            prompt_tokens=-1,
+            completion_tokens=-1,
+            total_tokens=-1,
+        )
+        chunk["usage"] = completion_usage
+        yield chunk

xinference/model/llm/sglang/core.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 import logging
 import time
 import uuid
@@ -122,6 +123,10 @@ class SGLANGModel(LLM):
             **self._model_config,
         )
+    def stop(self):
+        logger.info("Stopping SGLang engine")
+        self._engine.shutdown()
     def _sanitize_model_config(
         self, model_config: Optional[SGLANGModelConfig]
     ) -> SGLANGModelConfig:
@@ -132,18 +137,20 @@ class SGLANGModel(LLM):
         model_config.setdefault("tokenizer_mode", "auto")
         model_config.setdefault("trust_remote_code", True)
         model_config.setdefault("tp_size", cuda_count)
-        # See https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py#L37
-        mem_fraction_static = model_config.pop("mem_fraction_static", None)
+        # See https://github.com/sgl-project/sglang/blob/00023d622a6d484e67ef4a0e444f708b8fc861c8/python/sglang/srt/server_args.py#L100-L109
+        mem_fraction_static = model_config.get("mem_fraction_static")
         if mem_fraction_static is None:
             tp_size = model_config.get("tp_size", cuda_count)
-            if tp_size >= 8:
-                model_config["mem_fraction_static"] = 0.80
+            if tp_size >= 16:
+                model_config["mem_fraction_static"] = 0.79
+            elif tp_size >= 8:
+                model_config["mem_fraction_static"] = 0.83
             elif tp_size >= 4:
-                model_config["mem_fraction_static"] = 0.82
-            elif tp_size >= 2:
                 model_config["mem_fraction_static"] = 0.85
+            elif tp_size >= 2:
+                model_config["mem_fraction_static"] = 0.87
             else:
-                model_config["mem_fraction_static"] = 0.90
+                model_config["mem_fraction_static"] = 0.88
         model_config.setdefault("log_level", "info")
         model_config.setdefault("attention_reduce_in_fp32", False)
@@ -249,28 +256,64 @@ class SGLANGModel(LLM):
             usage=usage,
         )
+    @classmethod
+    def _filter_sampling_params(cls, sampling_params: dict):
+        if not sampling_params.get("lora_name"):
+            sampling_params.pop("lora_name", None)
+        return sampling_params
+    async def _stream_generate(self, prompt: str, **sampling_params):
+        import aiohttp
+        sampling_params = self._filter_sampling_params(sampling_params)
+        json_data = {
+            "text": prompt,
+            "sampling_params": sampling_params,
+            "stream": True,
+        }
+        pos = 0
+        timeout = aiohttp.ClientTimeout(total=3 * 3600)
+        async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
+            async with session.post(
+                self._engine.generate_url, json=json_data  # type: ignore
+            ) as response:
+                async for chunk, _ in response.content.iter_chunks():
+                    chunk = chunk.decode("utf-8")
+                    if chunk and chunk.startswith("data:"):
+                        stop = "data: [DONE]\n\n"
+                        need_stop = False
+                        if chunk.endswith(stop):
+                            chunk = chunk[: -len(stop)]
+                            need_stop = True
+                        if chunk:
+                            data = json.loads(chunk[5:].strip("\n"))
+                            cur = data["text"][pos:]
+                            if cur:
+                                yield data["meta_info"], cur
+                            pos += len(cur)
+                            if need_stop:
+                                break
+    async def _non_stream_generate(self, prompt: str, **sampling_params) -> dict:
+        import aiohttp
+        sampling_params = self._filter_sampling_params(sampling_params)
+        json_data = {
+            "text": prompt,
+            "sampling_params": sampling_params,
+        }
+        async with aiohttp.ClientSession(trust_env=True) as session:
+            async with session.post(
+                self._engine.generate_url, json=json_data  # type: ignore
+            ) as response:
+                return await response.json()
     async def async_generate(
         self,
         prompt: str,
         generate_config: Optional[SGLANGGenerateConfig] = None,
     ) -> Union[Completion, AsyncGenerator[CompletionChunk, None]]:
-        try:
-            import sglang as sgl
-            from sglang import assistant, gen, user
-        except ImportError:
-            error_message = "Failed to import module 'sglang'"
-            installation_guide = [
-                "Please make sure 'sglang' is installed. ",
-                "You can install it by `pip install sglang[all]`\n",
-            ]
-            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        @sgl.function
-        def pipeline(s, question):
-            s += user(question)
-            s += assistant(gen("answer"))
         sanitized_generate_config = self._sanitize_generate_config(generate_config)
         logger.debug(
             "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
@@ -285,25 +328,20 @@ class SGLANGModel(LLM):
         )
         request_id = str(uuid.uuid1())
-        state = pipeline.run(
-            question=prompt,
-            backend=self._engine,
-            stream=stream,
-            **sanitized_generate_config,
-        )
         if not stream:
+            state = await self._non_stream_generate(prompt, **sanitized_generate_config)
             return self._convert_state_to_completion(
                 request_id,
                 model=self.model_uid,
-                output_text=state["answer"],
-                meta_info=state.get_meta_info(name="answer"),
+                output_text=state["text"],
+                meta_info=state["meta_info"],
             )
         else:
             async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
                 prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
-                async for out, meta_info in state.text_async_iter(
-                    var_name="answer", return_meta_data=True
+                async for meta_info, out in self._stream_generate(
+                    prompt, **sanitized_generate_config
                 ):
                     chunk = self._convert_state_to_completion_chunk(
                         request_id, self.model_uid, output_text=out

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import asyncio
 import json
 import logging
 import multiprocessing
+import os
 import time
 import uuid
 from typing import (
@@ -240,6 +242,42 @@ class VLLMModel(LLM):
         )
         self._engine = AsyncLLMEngine.from_engine_args(engine_args)
+        self._check_health_task = None
+        if hasattr(self._engine, "check_health"):
+            # vLLM introduced `check_health` since v0.4.1
+            self._check_health_task = asyncio.create_task(self._check_healthy())
+    def stop(self):
+        # though the vLLM engine will shutdown when deleted,
+        # but some issue e.g. GH#1682 reported
+        # when deleting, the engine exists still
+        logger.info("Stopping vLLM engine")
+        if self._check_health_task:
+            self._check_health_task.cancel()
+        if model_executor := getattr(self._engine.engine, "model_executor", None):
+            model_executor.shutdown()
+        self._engine = None
+    async def _check_healthy(self, interval: int = 30):
+        from vllm.engine.async_llm_engine import AsyncEngineDeadError
+        logger.debug("Begin to check health of vLLM")
+        while self._engine is not None:
+            try:
+                await self._engine.check_health()
+            except (AsyncEngineDeadError, RuntimeError):
+                logger.info("Detecting vLLM is not health, prepare to quit the process")
+                try:
+                    self.stop()
+                except:
+                    # ignore error when stop
+                    pass
+                # Just kill the process and let xinference auto-recover the model
+                os._exit(1)
+            else:
+                await asyncio.sleep(interval)
     def _sanitize_model_config(
         self, model_config: Optional[VLLMModelConfig]
     ) -> VLLMModelConfig:

xinference/model/video/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import codecs
+import json
+import os
+from itertools import chain
+from .core import (
+    BUILTIN_VIDEO_MODELS,
+    MODEL_NAME_TO_REVISION,
+    MODELSCOPE_VIDEO_MODELS,
+    VIDEO_MODEL_DESCRIPTIONS,
+    VideoModelFamilyV1,
+    generate_video_description,
+    get_cache_status,
+    get_video_model_descriptions,
+)
+_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
+_model_spec_modelscope_json = os.path.join(
+    os.path.dirname(__file__), "model_spec_modelscope.json"
+)
+BUILTIN_VIDEO_MODELS.update(
+    dict(
+        (spec["model_name"], VideoModelFamilyV1(**spec))
+        for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
+    )
+)
+for model_name, model_spec in BUILTIN_VIDEO_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
+MODELSCOPE_VIDEO_MODELS.update(
+    dict(
+        (spec["model_name"], VideoModelFamilyV1(**spec))
+        for spec in json.load(
+            codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
+        )
+    )
+)
+for model_name, model_spec in MODELSCOPE_VIDEO_MODELS.items():
+    MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
+# register model description
+for model_name, model_spec in chain(
+    MODELSCOPE_VIDEO_MODELS.items(), BUILTIN_VIDEO_MODELS.items()
+):
+    VIDEO_MODEL_DESCRIPTIONS.update(generate_video_description(model_spec))
+del _model_spec_json
+del _model_spec_modelscope_json

xinference/model/video/core.py ADDED Viewed

@@ -0,0 +1,178 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from collections import defaultdict
+from typing import Dict, List, Literal, Optional, Tuple
+from ...constants import XINFERENCE_CACHE_DIR
+from ..core import CacheableModelSpec, ModelDescription
+from ..utils import valid_model_revision
+from .diffusers import DiffUsersVideoModel
+MAX_ATTEMPTS = 3
+logger = logging.getLogger(__name__)
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
+VIDEO_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
+BUILTIN_VIDEO_MODELS: Dict[str, "VideoModelFamilyV1"] = {}
+MODELSCOPE_VIDEO_MODELS: Dict[str, "VideoModelFamilyV1"] = {}
+def get_video_model_descriptions():
+    import copy
+    return copy.deepcopy(VIDEO_MODEL_DESCRIPTIONS)
+class VideoModelFamilyV1(CacheableModelSpec):
+    model_family: str
+    model_name: str
+    model_id: str
+    model_revision: str
+    model_hub: str = "huggingface"
+    model_ability: Optional[List[str]]
+class VideoModelDescription(ModelDescription):
+    def __init__(
+        self,
+        address: Optional[str],
+        devices: Optional[List[str]],
+        model_spec: VideoModelFamilyV1,
+        model_path: Optional[str] = None,
+    ):
+        super().__init__(address, devices, model_path=model_path)
+        self._model_spec = model_spec
+    def to_dict(self):
+        return {
+            "model_type": "video",
+            "address": self.address,
+            "accelerators": self.devices,
+            "model_name": self._model_spec.model_name,
+            "model_family": self._model_spec.model_family,
+            "model_revision": self._model_spec.model_revision,
+            "model_ability": self._model_spec.model_ability,
+        }
+    def to_version_info(self):
+        if self._model_path is None:
+            is_cached = get_cache_status(self._model_spec)
+            file_location = get_cache_dir(self._model_spec)
+        else:
+            is_cached = True
+            file_location = self._model_path
+        return [
+            {
+                "model_version": self._model_spec.model_name,
+                "model_file_location": file_location,
+                "cache_status": is_cached,
+            }
+        ]
+def generate_video_description(
+    video_model: VideoModelFamilyV1,
+) -> Dict[str, List[Dict]]:
+    res = defaultdict(list)
+    res[video_model.model_name].extend(
+        VideoModelDescription(None, None, video_model).to_version_info()
+    )
+    return res
+def match_diffusion(
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+) -> VideoModelFamilyV1:
+    from ..utils import download_from_modelscope
+    from . import BUILTIN_VIDEO_MODELS, MODELSCOPE_VIDEO_MODELS
+    if download_hub == "modelscope" and model_name in MODELSCOPE_VIDEO_MODELS:
+        logger.debug(f"Video model {model_name} found in ModelScope.")
+        return MODELSCOPE_VIDEO_MODELS[model_name]
+    elif download_hub == "huggingface" and model_name in BUILTIN_VIDEO_MODELS:
+        logger.debug(f"Video model {model_name} found in Huggingface.")
+        return BUILTIN_VIDEO_MODELS[model_name]
+    elif download_from_modelscope() and model_name in MODELSCOPE_VIDEO_MODELS:
+        logger.debug(f"Video model {model_name} found in ModelScope.")
+        return MODELSCOPE_VIDEO_MODELS[model_name]
+    elif model_name in BUILTIN_VIDEO_MODELS:
+        logger.debug(f"Video model {model_name} found in Huggingface.")
+        return BUILTIN_VIDEO_MODELS[model_name]
+    else:
+        raise ValueError(
+            f"Video model {model_name} not found, available"
+            f"model list: {BUILTIN_VIDEO_MODELS.keys()}"
+        )
+def cache(model_spec: VideoModelFamilyV1):
+    from ..utils import cache
+    return cache(model_spec, VideoModelDescription)
+def get_cache_dir(model_spec: VideoModelFamilyV1):
+    return os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name))
+def get_cache_status(
+    model_spec: VideoModelFamilyV1,
+) -> bool:
+    cache_dir = get_cache_dir(model_spec)
+    meta_path = os.path.join(cache_dir, "__valid_download")
+    model_name = model_spec.model_name
+    if model_name in BUILTIN_VIDEO_MODELS and model_name in MODELSCOPE_VIDEO_MODELS:
+        hf_spec = BUILTIN_VIDEO_MODELS[model_name]
+        ms_spec = MODELSCOPE_VIDEO_MODELS[model_name]
+        return any(
+            [
+                valid_model_revision(meta_path, hf_spec.model_revision),
+                valid_model_revision(meta_path, ms_spec.model_revision),
+            ]
+        )
+    else:  # Usually for UT
+        return valid_model_revision(meta_path, model_spec.model_revision)
+def create_video_model_instance(
+    subpool_addr: str,
+    devices: List[str],
+    model_uid: str,
+    model_name: str,
+    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    model_path: Optional[str] = None,
+    **kwargs,
+) -> Tuple[DiffUsersVideoModel, VideoModelDescription]:
+    model_spec = match_diffusion(model_name, download_hub)
+    if not model_path:
+        model_path = cache(model_spec)
+    assert model_path is not None
+    model = DiffUsersVideoModel(
+        model_uid,
+        model_path,
+        model_spec,
+        **kwargs,
+    )
+    model_description = VideoModelDescription(
+        subpool_addr, devices, model_spec, model_path=model_path
+    )
+    return model, model_description

xinference 0.14.0.post1__py3-none-any.whl → 0.14.1__py3-none-any.whl

Potentially problematic release.

xinference 0.14.0.post1py3-none-any.whl → 0.14.1py3-none-any.whl