PyPI - xinference - Versions diffs - 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

xinference 0.15.3py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (65) hide show

xinference/__init__.py +0 -4
xinference/_version.py +3 -3
xinference/api/restful_api.py +29 -2
xinference/client/restful/restful_client.py +10 -0
xinference/constants.py +7 -3
xinference/core/image_interface.py +76 -23
xinference/core/model.py +158 -46
xinference/core/progress_tracker.py +187 -0
xinference/core/scheduler.py +10 -7
xinference/core/supervisor.py +11 -0
xinference/core/utils.py +9 -0
xinference/core/worker.py +1 -0
xinference/deploy/supervisor.py +4 -0
xinference/model/__init__.py +4 -0
xinference/model/audio/chattts.py +2 -1
xinference/model/audio/core.py +0 -2
xinference/model/audio/model_spec.json +8 -0
xinference/model/audio/model_spec_modelscope.json +9 -0
xinference/model/image/core.py +6 -7
xinference/model/image/scheduler/__init__.py +13 -0
xinference/model/image/scheduler/flux.py +533 -0
xinference/model/image/sdapi.py +35 -4
xinference/model/image/stable_diffusion/core.py +215 -110
xinference/model/image/utils.py +39 -3
xinference/model/llm/__init__.py +2 -0
xinference/model/llm/llm_family.json +185 -17
xinference/model/llm/llm_family_modelscope.json +124 -12
xinference/model/llm/transformers/chatglm.py +104 -0
xinference/model/llm/transformers/cogvlm2.py +2 -1
xinference/model/llm/transformers/cogvlm2_video.py +2 -0
xinference/model/llm/transformers/core.py +43 -113
xinference/model/llm/transformers/deepseek_v2.py +0 -226
xinference/model/llm/transformers/deepseek_vl.py +2 -0
xinference/model/llm/transformers/glm4v.py +2 -1
xinference/model/llm/transformers/intern_vl.py +2 -0
xinference/model/llm/transformers/internlm2.py +3 -95
xinference/model/llm/transformers/minicpmv25.py +2 -0
xinference/model/llm/transformers/minicpmv26.py +2 -0
xinference/model/llm/transformers/omnilmm.py +2 -0
xinference/model/llm/transformers/opt.py +68 -0
xinference/model/llm/transformers/qwen2_audio.py +11 -4
xinference/model/llm/transformers/qwen2_vl.py +2 -28
xinference/model/llm/transformers/qwen_vl.py +2 -1
xinference/model/llm/transformers/utils.py +36 -283
xinference/model/llm/transformers/yi_vl.py +2 -0
xinference/model/llm/utils.py +60 -16
xinference/model/llm/vllm/core.py +68 -9
xinference/model/llm/vllm/utils.py +0 -1
xinference/model/utils.py +7 -4
xinference/model/video/core.py +0 -2
xinference/utils.py +2 -3
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.e51a356d.js → main.f7da0140.js} +3 -3
xinference/web/ui/build/static/js/main.f7da0140.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +1 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/METADATA +38 -6
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/RECORD +63 -59
xinference/web/ui/build/static/js/main.e51a356d.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +0 -1
/xinference/web/ui/build/static/js/{main.e51a356d.js.LICENSE.txt → main.f7da0140.js.LICENSE.txt} +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/LICENSE +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/WHEEL +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/entry_points.txt +0 -0
{xinference-0.15.3.dist-info → xinference-0.16.0.dist-info}/top_level.txt +0 -0

xinference/core/progress_tracker.py ADDED Viewed

@@ -0,0 +1,187 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import dataclasses
+import logging
+import os
+import time
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+import xoscar as xo
+TO_REMOVE_PROGRESS_INTERVAL = float(
+    os.getenv("XINFERENCE_REMOVE_PROGRESS_INTERVAL", 5 * 60)
+)  # 5min
+CHECK_PROGRESS_INTERVAL = float(
+    os.getenv("XINFERENCE_CHECK_PROGRESS_INTERVAL", 1 * 60)
+)  # 1min
+UPLOAD_PROGRESS_SPAN = float(
+    os.getenv("XINFERENCE_UPLOAD_PROGRESS_SPAN", 0.05)
+)  # not upload when change less than 0.1
+logger = logging.getLogger(__name__)
+@dataclasses.dataclass
+class _ProgressInfo:
+    progress: float
+    last_updated: float
+    info: Optional[str] = None
+class ProgressTrackerActor(xo.StatelessActor):
+    _request_id_to_progress: Dict[str, _ProgressInfo]
+    @classmethod
+    def default_uid(cls) -> str:
+        return "progress_tracker"
+    def __init__(
+        self,
+        to_remove_interval: float = TO_REMOVE_PROGRESS_INTERVAL,
+        check_interval: float = CHECK_PROGRESS_INTERVAL,
+    ):
+        super().__init__()
+        self._request_id_to_progress = {}
+        self._clear_finished_task = None
+        self._to_remove_interval = to_remove_interval
+        self._check_interval = check_interval
+    async def __post_create__(self):
+        self._clear_finished_task = asyncio.create_task(self._clear_finished())
+    async def __pre_destroy__(self):
+        if self._clear_finished_task:
+            self._clear_finished_task.cancel()
+    async def _clear_finished(self):
+        to_remove_request_ids = []
+        while True:
+            now = time.time()
+            for request_id, progress in self._request_id_to_progress.items():
+                if abs(progress.progress - 1.0) > 1e-5:
+                    continue
+                # finished
+                if now - progress.last_updated > self._to_remove_interval:
+                    to_remove_request_ids.append(request_id)
+            for rid in to_remove_request_ids:
+                del self._request_id_to_progress[rid]
+            if to_remove_request_ids:
+                logger.debug(
+                    "Remove requests %s due to it's finished for over %s seconds",
+                    to_remove_request_ids,
+                    self._to_remove_interval,
+                )
+            await asyncio.sleep(self._check_interval)
+    def start(self, request_id: str):
+        self._request_id_to_progress[request_id] = _ProgressInfo(
+            progress=0.0, last_updated=time.time()
+        )
+    def set_progress(self, request_id: str, progress: float):
+        assert progress <= 1.0
+        info = self._request_id_to_progress[request_id]
+        info.progress = progress
+        info.last_updated = time.time()
+        logger.debug(
+            "Setting progress, request id: %s, progress: %s", request_id, progress
+        )
+    def get_progress(self, request_id: str) -> float:
+        return self._request_id_to_progress[request_id].progress
+class Progressor:
+    _sub_progress_stack: List[Tuple[float, float]]
+    def __init__(
+        self,
+        request_id: str,
+        progress_tracker_ref: xo.ActorRefType["ProgressTrackerActor"],
+        loop: asyncio.AbstractEventLoop,
+        upload_span: float = UPLOAD_PROGRESS_SPAN,
+    ):
+        self.request_id = request_id
+        self.progress_tracker_ref = progress_tracker_ref
+        self.loop = loop
+        # uploading when progress changes over this span
+        # to prevent from frequently uploading
+        self._upload_span = upload_span
+        self._last_report_progress = 0.0
+        self._current_progress = 0.0
+        self._sub_progress_stack = [(0.0, 1.0)]
+        self._current_sub_progress_start = 0.0
+        self._current_sub_progress_end = 1.0
+    async def start(self):
+        if self.request_id:
+            await self.progress_tracker_ref.start(self.request_id)
+    def split_stages(self, n_stage: int, stage_weight: Optional[List[float]] = None):
+        if self.request_id:
+            if stage_weight is not None:
+                if len(stage_weight) != n_stage + 1:
+                    raise ValueError(
+                        f"stage_weight should have size {n_stage + 1}, got {len(stage_weight)}"
+                    )
+                progresses = stage_weight
+            else:
+                progresses = np.linspace(
+                    self._current_sub_progress_start,
+                    self._current_sub_progress_end,
+                    n_stage + 1,
+                )
+            spans = [(progresses[i], progresses[i + 1]) for i in range(n_stage)]
+            self._sub_progress_stack.extend(spans[::-1])
+    def __enter__(self):
+        if self.request_id:
+            (
+                self._current_sub_progress_start,
+                self._current_sub_progress_end,
+            ) = self._sub_progress_stack[-1]
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.request_id:
+            self._sub_progress_stack.pop()
+            # force to set progress to 1.0 for this sub progress
+            # nevertheless it is done or not
+            self.set_progress(1.0)
+        return False
+    def set_progress(self, progress: float):
+        if self.request_id:
+            self._current_progress = (
+                self._current_sub_progress_start
+                + (self._current_sub_progress_end - self._current_sub_progress_start)
+                * progress
+            )
+            if (
+                self._current_progress - self._last_report_progress >= self._upload_span
+                or 1.0 - progress < 1e-5
+            ):
+                set_progress = self.progress_tracker_ref.set_progress(
+                    self.request_id, self._current_progress
+                )
+                asyncio.run_coroutine_threadsafe(set_progress, self.loop)  # type: ignore
+                self._last_report_progress = self._current_progress

xinference/core/scheduler.py CHANGED Viewed

@@ -17,11 +17,12 @@ import functools
 import logging
 import uuid
 from collections import deque
-from enum import Enum
 from typing import Dict, List, Optional, Set, Tuple, Union
 import xoscar as xo
+from .utils import AbortRequestMessage
 logger = logging.getLogger(__name__)
 XINFERENCE_STREAMING_DONE_FLAG = "<XINFERENCE_STREAMING_DONE>"
@@ -30,12 +31,6 @@ XINFERENCE_STREAMING_ABORT_FLAG = "<XINFERENCE_STREAMING_ABORT>"
 XINFERENCE_NON_STREAMING_ABORT_FLAG = "<XINFERENCE_NON_STREAMING_ABORT>"
-class AbortRequestMessage(Enum):
-    NOT_FOUND = 1
-    DONE = 2
-    NO_OP = 3
 class InferenceRequest:
     def __init__(
         self,
@@ -81,6 +76,10 @@ class InferenceRequest:
         self.padding_len = 0
         # Use in stream mode
         self.last_output_length = 0
+        # For tool call
+        self.tools = None
+        # Currently, for storing tool call streaming results.
+        self.outputs: List[str] = []
         # inference results,
         # it is a list type because when stream=True,
         # self.completion contains all the results in a decode round.
@@ -112,6 +111,10 @@ class InferenceRequest:
         """
         return self._prompt
+    @prompt.setter
+    def prompt(self, value: str):
+        self._prompt = value
     @property
     def call_ability(self):
         return self._call_ability

xinference/core/supervisor.py CHANGED Viewed

@@ -130,6 +130,7 @@ class SupervisorActor(xo.StatelessActor):
             )
         logger.info(f"Xinference supervisor {self.address} started")
         from .cache_tracker import CacheTrackerActor
+        from .progress_tracker import ProgressTrackerActor
         from .status_guard import StatusGuardActor
         self._status_guard_ref: xo.ActorRefType[  # type: ignore
@@ -142,6 +143,13 @@ class SupervisorActor(xo.StatelessActor):
         ] = await xo.create_actor(
             CacheTrackerActor, address=self.address, uid=CacheTrackerActor.default_uid()
         )
+        self._progress_tracker: xo.ActorRefType[  # type: ignore
+            "ProgressTrackerActor"
+        ] = await xo.create_actor(
+            ProgressTrackerActor,
+            address=self.address,
+            uid=ProgressTrackerActor.default_uid(),
+        )
         from .event import EventCollectorActor
@@ -1360,3 +1368,6 @@ class SupervisorActor(xo.StatelessActor):
     @staticmethod
     def record_metrics(name, op, kwargs):
         record_metrics(name, op, kwargs)
+    async def get_progress(self, request_id: str) -> float:
+        return await self._progress_tracker.get_progress(request_id)

xinference/core/utils.py CHANGED Viewed

@@ -16,6 +16,7 @@ import os
 import random
 import string
 import uuid
+from enum import Enum
 from typing import Dict, Generator, List, Optional, Tuple, Union
 import orjson
@@ -27,6 +28,12 @@ from ..constants import XINFERENCE_LOG_ARG_MAX_LENGTH
 logger = logging.getLogger(__name__)
+class AbortRequestMessage(Enum):
+    NOT_FOUND = 1
+    DONE = 2
+    NO_OP = 3
 def truncate_log_arg(arg) -> str:
     s = str(arg)
     if len(s) > XINFERENCE_LOG_ARG_MAX_LENGTH:
@@ -51,6 +58,8 @@ def log_async(
             request_id_str = kwargs.get("request_id", "")
             if not request_id_str:
                 request_id_str = uuid.uuid1()
+                if func_name == "text_to_image":
+                    kwargs["request_id"] = request_id_str
             request_id_str = f"[request {request_id_str}]"
             formatted_args = ",".join(map(truncate_log_arg, args))
             formatted_kwargs = ",".join(

xinference/core/worker.py CHANGED Viewed

@@ -885,6 +885,7 @@ class WorkerActor(xo.StatelessActor):
                     ModelActor,
                     address=subpool_address,
                     uid=model_uid,
+                    supervisor_address=self._supervisor_address,
                     worker_address=self.address,
                     model=model,
                     model_description=model_description,

xinference/deploy/supervisor.py CHANGED Viewed

@@ -31,6 +31,10 @@ from .utils import health_check
 logger = logging.getLogger(__name__)
+from ..model import _install as install_model
+install_model()
 async def _start_supervisor(address: str, logging_conf: Optional[Dict] = None):
     logging.config.dictConfig(logging_conf)  # type: ignore

xinference/model/__init__.py CHANGED Viewed

@@ -29,3 +29,7 @@ def _install():
     image_install()
     rerank_install()
     video_install()
+_install()
+del _install

xinference/model/audio/chattts.py CHANGED Viewed

@@ -53,7 +53,8 @@ class ChatTTSModel:
         torch._dynamo.config.suppress_errors = True
         torch.set_float32_matmul_precision("high")
         self._model = ChatTTS.Chat()
-        self._model.load(source="custom", custom_path=self._model_path, compile=True)
+        logger.info("Load ChatTTS model with kwargs: %s", self._kwargs)
+        self._model.load(source="custom", custom_path=self._model_path, **self._kwargs)
     def speech(
         self,

xinference/model/audio/core.py CHANGED Viewed

@@ -25,8 +25,6 @@ from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
 from .whisper import WhisperModel
-MAX_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
 # Used for check whether the model is cached.

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -71,6 +71,14 @@
     "model_ability": "audio-to-text",
     "multilingual": true
   },
+  {
+    "model_name": "whisper-large-v3-turbo",
+    "model_family": "whisper",
+    "model_id": "openai/whisper-large-v3-turbo",
+    "model_revision": "41f01f3fe87f28c78e2fbf8b568835947dd65ed9",
+    "model_ability": "audio-to-text",
+    "multilingual": true
+  },
   {
     "model_name": "Belle-distilwhisper-large-v2-zh",
     "model_family": "whisper",

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -8,6 +8,15 @@
     "model_ability": "audio-to-text",
     "multilingual": true
   },
+  {
+    "model_name": "whisper-large-v3-turbo",
+    "model_family": "whisper",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/whisper-large-v3-turbo",
+    "model_revision": "master",
+    "model_ability": "audio-to-text",
+    "multilingual": true
+  },
   {
     "model_name": "SenseVoiceSmall",
     "model_family": "funasr",

xinference/model/image/core.py CHANGED Viewed

@@ -23,8 +23,6 @@ from ..core import CacheableModelSpec, ModelDescription
 from ..utils import valid_model_revision
 from .stable_diffusion.core import DiffusionModel
-MAX_ATTEMPTS = 3
 logger = logging.getLogger(__name__)
 MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
@@ -210,18 +208,19 @@ def create_image_model_instance(
         for name in controlnet:
             for cn_model_spec in model_spec.controlnet:
                 if cn_model_spec.model_name == name:
-                    if not model_path:
-                        model_path = cache(cn_model_spec)
-                    controlnet_model_paths.append(model_path)
+                    controlnet_model_path = cache(cn_model_spec)
+                    controlnet_model_paths.append(controlnet_model_path)
                     break
             else:
                 raise ValueError(
                     f"controlnet `{name}` is not supported for model `{model_name}`."
                 )
         if len(controlnet_model_paths) == 1:
-            kwargs["controlnet"] = controlnet_model_paths[0]
+            kwargs["controlnet"] = (controlnet[0], controlnet_model_paths[0])
         else:
-            kwargs["controlnet"] = controlnet_model_paths
+            kwargs["controlnet"] = [
+                (n, path) for n, path in zip(controlnet, controlnet_model_paths)
+            ]
     if not model_path:
         model_path = cache(model_spec)
     if peft_model_config is not None:

xinference/model/image/scheduler/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022-2024 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

xinference 0.15.3__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

xinference 0.15.3py3-none-any.whl → 0.16.0py3-none-any.whl