PyPI - xinference - Versions diffs - 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

xinference 0.14.4.post1py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (194) hide show

xinference/model/llm/transformers/cogvlm2.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, Iterator, List, Optional, Tuple, Union
@@ -21,17 +20,14 @@ import torch
 from ....core.scheduler import InferenceRequest
 from ....model.utils import select_device
-from ....types import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionMessage,
-    Completion,
-    CompletionChoice,
-    CompletionChunk,
-    CompletionUsage,
-)
+from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
 from ..llm_family import LLMFamilyV1, LLMSpecV1
-from ..utils import _decode_image
+from ..utils import (
+    _decode_image,
+    generate_chat_completion,
+    generate_completion_chunk,
+    parse_messages,
+)
 from .core import PytorchChatModel, PytorchGenerateConfig
 from .utils import get_max_src_len
@@ -139,9 +135,7 @@ class CogVLM2Model(PytorchChatModel):
                 )
         return content, None
-    def _history_content_to_cogvlm2(
-        self, system_prompt: str, chat_history: List[ChatCompletionMessage]
-    ):
+    def _history_content_to_cogvlm2(self, system_prompt: str, chat_history: List[Dict]):
         query = system_prompt
         history: List[Tuple] = []
         pixel_values = None
@@ -163,7 +157,7 @@ class CogVLM2Model(PytorchChatModel):
         self,
         prompt: Union[str, List[Dict]],
         system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        chat_history: Optional[List[Dict]] = None,
     ):
         content, image = self._message_content_to_cogvlm2(prompt)
@@ -184,12 +178,12 @@ class CogVLM2Model(PytorchChatModel):
     def chat(
         self,
-        prompt: Union[str, List[Dict]],
-        system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        messages: List[Dict],
         generate_config: Optional[PytorchGenerateConfig] = None,
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
-        system_prompt = system_prompt if system_prompt else ""
+        system_prompt = ""
+        if messages[0]["role"] == "system":
+            system_prompt = messages[0]["content"]
         stream = generate_config.get("stream", False) if generate_config else False
         sanitized_config = {
@@ -199,6 +193,7 @@ class CogVLM2Model(PytorchChatModel):
             else 512,
         }
+        prompt, _, chat_history = parse_messages(messages)
         query, image, history = self.get_query_and_history(
             prompt, system_prompt=system_prompt, chat_history=chat_history
         )
@@ -236,21 +231,7 @@ class CogVLM2Model(PytorchChatModel):
                 response = self._tokenizer.decode(outputs[0])
                 response = response.split("<|end_of_text|>")[0]
-            chunk = Completion(
-                id=str(uuid.uuid1()),
-                object="text_completion",
-                created=int(time.time()),
-                model=self.model_uid,
-                choices=[
-                    CompletionChoice(
-                        index=0, text=response, finish_reason="stop", logprobs=None
-                    )
-                ],
-                usage=CompletionUsage(
-                    prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-                ),
-            )
-            return self._to_chat_completion(chunk)
+            return generate_chat_completion(self.model_uid, response)
     def _streaming_chat_response(
         self, inputs: Dict, config: Dict
@@ -277,36 +258,26 @@ class CogVLM2Model(PytorchChatModel):
         completion_id = str(uuid.uuid1())
         for new_text in streamer:
-            chunk = CompletionChunk(
-                id=completion_id,
-                object="text_completion",
-                created=int(time.time()),
-                model=self.model_uid,
-                choices=[
-                    CompletionChoice(
-                        index=0, text=new_text, finish_reason=None, logprobs=None
-                    )
-                ],
-                usage=CompletionUsage(
-                    prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-                ),
+            yield generate_completion_chunk(
+                chunk_text=new_text,
+                finish_reason=None,
+                chunk_id=completion_id,
+                model_uid=self.model_uid,
+                prompt_tokens=-1,
+                completion_tokens=-1,
+                total_tokens=-1,
             )
-            yield chunk
-        completion_choice = CompletionChoice(
-            text="", index=0, logprobs=None, finish_reason="stop"
-        )
-        chunk = CompletionChunk(
-            id=completion_id,
-            object="text_completion",
-            created=int(time.time()),
-            model=self.model_uid,
-            choices=[completion_choice],
-            usage=CompletionUsage(
-                prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-            ),
+        yield generate_completion_chunk(
+            chunk_text=None,
+            finish_reason="stop",
+            chunk_id=completion_id,
+            model_uid=self.model_uid,
+            prompt_tokens=-1,
+            completion_tokens=-1,
+            total_tokens=-1,
+            has_choice=True,
+            has_content=False,
         )
-        yield chunk
     @staticmethod
     def build_position_ids(x, attention_mask=None):
@@ -341,7 +312,9 @@ class CogVLM2Model(PytorchChatModel):
     def get_dtype(self):
         return self._torch_type
-    def _get_full_prompt(self, prompt, system_prompt, chat_history, tools):
+    def _get_full_prompt(self, messages: List[Dict], tools):
+        prompt, system_prompt, chat_history = parse_messages(messages)
+        system_prompt = system_prompt or ""
         query, image, history = self.get_query_and_history(
             prompt, system_prompt=system_prompt, chat_history=chat_history
         )

xinference/model/llm/transformers/cogvlm2_video.py CHANGED Viewed

@@ -12,28 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import time
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, Iterator, List, Optional, Tuple, Union
 import torch
-from ....core.scheduler import InferenceRequest
 from ....model.utils import select_device
-from ....types import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionMessage,
-    Completion,
-    CompletionChoice,
-    CompletionChunk,
-    CompletionUsage,
-)
+from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
 from ..llm_family import LLMFamilyV1, LLMSpecV1
-from ..utils import _decode_image
+from ..utils import (
+    _decode_image,
+    generate_chat_completion,
+    generate_completion_chunk,
+    parse_messages,
+)
 from .core import PytorchChatModel, PytorchGenerateConfig
-from .utils import get_max_src_len
 logger = logging.getLogger(__name__)
@@ -170,9 +164,7 @@ class CogVLM2VideoModel(PytorchChatModel):
             return text, images, video
         return content, [], None
-    def _history_content_to_cogvlm2(
-        self, system_prompt: str, chat_history: List[ChatCompletionMessage]
-    ):
+    def _history_content_to_cogvlm2(self, system_prompt: str, chat_history: List[Dict]):
         query = system_prompt
         history: List[Tuple] = []
         pixel_values = None
@@ -202,7 +194,7 @@ class CogVLM2VideoModel(PytorchChatModel):
         self,
         prompt: Union[str, List[Dict]],
         system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        chat_history: Optional[List[Dict]] = None,
     ):
         content, image, video = self._message_content_to_cogvlm2(prompt)
@@ -237,12 +229,12 @@ class CogVLM2VideoModel(PytorchChatModel):
     def chat(
         self,
-        prompt: Union[str, List[Dict]],
-        system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        messages: List[Dict],
         generate_config: Optional[PytorchGenerateConfig] = None,
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
-        system_prompt = system_prompt if system_prompt else ""
+        system_prompt = ""
+        if messages[0]["role"] == "system":
+            system_prompt = messages[0]["content"]
         stream = generate_config.get("stream", False) if generate_config else False
         sanitized_config = {
@@ -252,6 +244,7 @@ class CogVLM2VideoModel(PytorchChatModel):
             else 512,
         }
+        prompt, _, chat_history = parse_messages(messages)
         query, image, video, history = self.get_query_and_history(
             prompt, system_prompt=system_prompt, chat_history=chat_history
         )
@@ -292,21 +285,7 @@ class CogVLM2VideoModel(PytorchChatModel):
                 response = self._tokenizer.decode(outputs[0])
                 response = response.split("<|end_of_text|>")[0]
-            chunk = Completion(
-                id=str(uuid.uuid1()),
-                object="text_completion",
-                created=int(time.time()),
-                model=self.model_uid,
-                choices=[
-                    CompletionChoice(
-                        index=0, text=response, finish_reason="stop", logprobs=None
-                    )
-                ],
-                usage=CompletionUsage(
-                    prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-                ),
-            )
-            return self._to_chat_completion(chunk)
+            return generate_chat_completion(self.model_uid, response)
     def _streaming_chat_response(
         self, inputs: Dict, config: Dict
@@ -333,192 +312,23 @@ class CogVLM2VideoModel(PytorchChatModel):
         completion_id = str(uuid.uuid1())
         for new_text in streamer:
-            chunk = CompletionChunk(
-                id=completion_id,
-                object="text_completion",
-                created=int(time.time()),
-                model=self.model_uid,
-                choices=[
-                    CompletionChoice(
-                        index=0, text=new_text, finish_reason=None, logprobs=None
-                    )
-                ],
-                usage=CompletionUsage(
-                    prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-                ),
+            yield generate_completion_chunk(
+                chunk_text=new_text,
+                finish_reason=None,
+                chunk_id=completion_id,
+                model_uid=self.model_uid,
+                prompt_tokens=-1,
+                completion_tokens=-1,
+                total_tokens=-1,
             )
-            yield chunk
-        completion_choice = CompletionChoice(
-            text="", index=0, logprobs=None, finish_reason="stop"
-        )
-        chunk = CompletionChunk(
-            id=completion_id,
-            object="text_completion",
-            created=int(time.time()),
-            model=self.model_uid,
-            choices=[completion_choice],
-            usage=CompletionUsage(
-                prompt_tokens=-1, completion_tokens=-1, total_tokens=-1
-            ),
-        )
-        yield chunk
-    @staticmethod
-    def build_position_ids(x, attention_mask=None):
-        """
-        Copied from https://huggingface.co/THUDM/cogvlm2-llama3-chinese-chat-19B-int4/blob/main/modeling_cogvlm.py
-        """
-        # Fix: 参考官方开源代码
-        if attention_mask is not None:
-            tmp = x.clone()
-            tmp[~(attention_mask.bool())] = -1
-        else:
-            tmp = x.clone()
-        # image boi eoi token as LANGUAGE_TOKEN_TYPE
-        is_boi_eoi = torch.zeros_like(x, dtype=torch.bool)
-        is_boi_eoi[:, 1:] |= (tmp[:, 1:] == VISION_TOKEN_TYPE) & (
-            tmp[:, :-1] == LANGUAGE_TOKEN_TYPE
-        )
-        is_boi_eoi[:, 0] |= tmp[:, 0] == VISION_TOKEN_TYPE
-        is_boi_eoi[:, :-1] |= (tmp[:, :-1] == VISION_TOKEN_TYPE) & (
-            tmp[:, 1:] == LANGUAGE_TOKEN_TYPE
+        yield generate_completion_chunk(
+            chunk_text=None,
+            finish_reason="stop",
+            chunk_id=completion_id,
+            model_uid=self.model_uid,
+            prompt_tokens=-1,
+            completion_tokens=-1,
+            total_tokens=-1,
+            has_choice=True,
+            has_content=False,
         )
-        is_boi_eoi[:, -1] |= tmp[:, -1] == VISION_TOKEN_TYPE
-        tmp[is_boi_eoi] = LANGUAGE_TOKEN_TYPE
-        # final position ids
-        y = torch.zeros_like(x, dtype=torch.long)
-        y[:, 1:] = (tmp[:, 1:] == LANGUAGE_TOKEN_TYPE) | (
-            (tmp[:, 1:] == VISION_TOKEN_TYPE) & (tmp[:, :-1] == LANGUAGE_TOKEN_TYPE)
-        )
-        y = y.cumsum(dim=-1)
-        return y
-    def get_dtype(self):
-        return self._torch_type
-    def _get_full_prompt(self, prompt, system_prompt, chat_history, tools):
-        query, image, video, history = self.get_query_and_history(
-            prompt, system_prompt=system_prompt, chat_history=chat_history
-        )
-        if video:
-            image = [video]
-        input_by_model: dict = self._model.build_conversation_input_ids(  # type: ignore
-            self._tokenizer,
-            query=query,
-            history=history,
-            images=image,
-            template_version="chat",
-        )
-        return {
-            "input_ids": input_by_model["input_ids"],  # seq_len
-            "token_type_ids": input_by_model["token_type_ids"],  # seq_len
-            "attention_mask": input_by_model["attention_mask"],  # seq_len
-            "images": input_by_model["images"],
-        }
-    def prepare_sanitize_generate_config(self, req: InferenceRequest):
-        """
-        See https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B/blob/main/generation_config.json
-        """
-        raw_config = req.inference_kwargs.get("raw_params", {})
-        temperature = raw_config.get("temperature", None)
-        if temperature is None:
-            raw_config["temperature"] = 0.6
-        top_p = raw_config.get("top_p", None)
-        if top_p is None:
-            raw_config["top_p"] = 0.9
-        return raw_config
-    def build_prefill_kwargs(self, prompts: List, req_list: List[InferenceRequest]):
-        context_len = self.get_context_len()
-        assert isinstance(prompts[0], dict)
-        images = []
-        max_length = float("-inf")
-        for i, feature in enumerate(prompts):
-            req = req_list[i]
-            if "images" in feature:
-                images.append(feature.pop("images", None))
-            max_src_len = get_max_src_len(context_len, req)
-            input_ids = feature["input_ids"][-max_src_len:]
-            req.prompt_tokens = input_ids.tolist()
-            feature["input_ids"] = input_ids
-            feature["token_type_ids"] = feature["token_type_ids"][-max_src_len:]
-            feature["attention_mask"] = feature["attention_mask"][-max_src_len:]
-            req.extra_kwargs["attention_mask_seq_len"] = feature[
-                "attention_mask"
-            ].shape[0]
-            max_length = max(len(input_ids), max_length)
-        def pad_to_max_length_internal(feature, max_len, idx):
-            padding_length = max_len - len(feature["input_ids"])
-            req_list[idx].padding_len = padding_length
-            feature["input_ids"] = torch.cat(
-                [torch.full((padding_length,), 0), feature["input_ids"]]
-            )
-            feature["token_type_ids"] = torch.cat(
-                [
-                    torch.zeros(padding_length, dtype=torch.long),
-                    feature["token_type_ids"],
-                ]
-            )
-            feature["attention_mask"] = torch.cat(
-                [
-                    torch.zeros(padding_length, dtype=torch.long),
-                    feature["attention_mask"],
-                ]
-            )
-            return feature
-        features = [
-            pad_to_max_length_internal(feature, max_length, i)
-            for i, feature in enumerate(prompts)
-        ]
-        batch = {
-            key: torch.stack([feature[key] for feature in features])
-            for key in features[0].keys()
-        }
-        position_ids = self.build_position_ids(batch["token_type_ids"])
-        batch["position_ids"] = position_ids
-        for i in range(len(prompts)):
-            req = req_list[i]
-            req.extra_kwargs["max_position_id"] = position_ids[i : i + 1, -1].item()
-        if images:
-            batch["images"] = images
-        batch = recur_move_to(
-            batch, self._device, lambda x: isinstance(x, torch.Tensor)
-        )
-        dtype = self.get_dtype()
-        if dtype:
-            batch = recur_move_to(
-                batch,
-                dtype,
-                lambda x: isinstance(x, torch.Tensor) and torch.is_floating_point(x),
-            )
-        return batch
-    def build_decode_token_type_ids(
-        self, batch_size: int, seq_length: int, reqs: List[InferenceRequest]
-    ):
-        token_type_ids = torch.full(
-            (batch_size, 1), fill_value=1, dtype=torch.long, device=self._device
-        )
-        return token_type_ids
-    def build_decode_position_ids(
-        self, batch_size: int, seq_length: int, reqs: List[InferenceRequest]
-    ):
-        tmp = []
-        for r in reqs:
-            r.extra_kwargs["max_position_id"] += 1
-            tmp.append(r.extra_kwargs["max_position_id"])
-        position_ids = torch.as_tensor(
-            tmp, device=self._device, dtype=torch.long
-        ).unsqueeze(1)
-        return position_ids

xinference/model/llm/transformers/core.py CHANGED Viewed

@@ -16,7 +16,7 @@ import json
 import logging
 import os
 from functools import lru_cache
-from typing import Iterable, Iterator, List, Optional, Tuple, Union
+from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union
 import torch
@@ -29,7 +29,6 @@ from ....device_utils import (
 from ....types import (
     ChatCompletion,
     ChatCompletionChunk,
-    ChatCompletionMessage,
     Completion,
     CompletionChoice,
     CompletionChunk,
@@ -52,8 +51,6 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
     "chatglm3-128k",
     "glm4-chat",
     "glm4-chat-1m",
-    "llama-2",
-    "llama-2-chat",
     "internlm2-chat",
     "internlm2.5-chat",
     "qwen-vl-chat",
@@ -67,6 +64,13 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
     "MiniCPM-Llama3-V-2_5",
     "MiniCPM-V-2.6",
     "glm-4v",
+    "qwen2-vl-instruct",
+    "qwen2-audio",
+    "qwen2-audio-instruct",
+    "deepseek-v2",
+    "deepseek-v2-chat",
+    "deepseek-v2.5",
+    "deepseek-v2-chat-0628",
 ]
@@ -615,12 +619,17 @@ class PytorchModel(LLM):
                 r.error_msg = str(e)
     def get_builtin_stop_token_ids(self) -> Tuple:
-        return (
-            tuple(self.model_family.prompt_style.stop_token_ids)
-            if self.model_family.prompt_style
-            and self.model_family.prompt_style.stop_token_ids
-            else tuple()
-        )
+        from ..utils import get_stop_token_ids_from_config_file
+        stop_token_ids = get_stop_token_ids_from_config_file(self.model_path)
+        if stop_token_ids is not None:
+            return tuple(stop_token_ids)
+        else:
+            return (
+                tuple(self.model_family.stop_token_ids)
+                if self.model_family.stop_token_ids
+                else tuple()
+            )
     def handle_batch_inference_results(self, req_list: List[InferenceRequest]):
         for req in req_list:
@@ -693,20 +702,13 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
         generate_config: Optional[PytorchGenerateConfig],
     ) -> PytorchGenerateConfig:
         generate_config = super()._sanitize_generate_config(generate_config)
-        if (
-            (not generate_config.get("stop"))
-            and self.model_family.prompt_style
-            and self.model_family.prompt_style.stop
-        ):
-            generate_config["stop"] = self.model_family.prompt_style.stop.copy()
+        if (not generate_config.get("stop")) and self.model_family.stop is not None:
+            generate_config["stop"] = self.model_family.stop.copy()
         if (
             generate_config.get("stop_token_ids", None) is None
-            and self.model_family.prompt_style
-            and self.model_family.prompt_style.stop_token_ids
+            and self.model_family.stop_token_ids is not None
         ):
-            generate_config[
-                "stop_token_ids"
-            ] = self.model_family.prompt_style.stop_token_ids.copy()
+            generate_config["stop_token_ids"] = self.model_family.stop_token_ids.copy()
         return generate_config
@@ -725,26 +727,23 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
     def chat(
         self,
-        prompt: str,
-        system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
+        messages: List[Dict],
         generate_config: Optional[PytorchGenerateConfig] = None,
     ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
         tools = generate_config.pop("tools", []) if generate_config else None
-        full_prompt = self._get_full_prompt(prompt, system_prompt, chat_history, tools)
-        generate_config = self._sanitize_generate_config(generate_config)
-        # TODO(codingl2k1): qwen hacky to set stop for function call.
         model_family = self.model_family.model_family or self.model_family.model_name
+        full_context_kwargs = {}
         if tools and model_family in QWEN_TOOL_CALL_FAMILY:
-            stop = generate_config.get("stop")
-            if isinstance(stop, str):
-                generate_config["stop"] = [stop, "Observation:"]
-            elif isinstance(stop, Iterable):
-                assert not isinstance(stop, str)
-                generate_config["stop"] = list(stop) + ["Observation:"]
-            else:
-                generate_config["stop"] = "Observation:"
+            full_context_kwargs["tools"] = tools
+        assert self.model_family.chat_template is not None
+        full_prompt = self.get_full_context(
+            messages,
+            self.model_family.chat_template,
+            tokenizer=self._tokenizer,
+            **full_context_kwargs,
+        )
+        generate_config = self._sanitize_generate_config(generate_config)
         stream = generate_config.get("stream", False)
         if stream:
@@ -755,22 +754,16 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
             c = self.generate(full_prompt, generate_config)
             assert not isinstance(c, Iterator)
             if tools:
-                return self._tool_calls_completion(
-                    self.model_family, self.model_uid, c, tools
-                )
+                return self._tool_calls_completion(self.model_family, self.model_uid, c)
             return self._to_chat_completion(c)
     def load(self):
         super().load()
-    def _get_full_prompt(self, prompt, system_prompt, chat_history, tools):
-        assert self.model_family.prompt_style is not None
-        prompt_style = self.model_family.prompt_style.copy()
-        if system_prompt:
-            prompt_style.system_prompt = system_prompt
-        chat_history = chat_history or []
-        full_prompt = ChatModelMixin.get_prompt(
-            prompt, chat_history, prompt_style, tools=tools
+    def _get_full_prompt(self, messages: List[Dict], tools):
+        assert self.model_family.chat_template is not None
+        full_prompt = self.get_full_context(
+            messages, self.model_family.chat_template, tokenizer=self._tokenizer
         )
         return full_prompt
@@ -779,9 +772,7 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
         for r in req_list:
             try:
                 if not r.stopped and r.is_prefill:
-                    r.full_prompt = self._get_full_prompt(
-                        r.prompt, r.system_prompt, r.chat_history, None
-                    )
+                    r.full_prompt = self._get_full_prompt(r.prompt, None)
             except Exception as e:
                 logger.exception(f"prepare inference error with {e}")
                 r.stopped = True
@@ -790,6 +781,20 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
     def handle_batch_inference_results(self, req_list: List[InferenceRequest]):
         for req in req_list:
             if req.error_msg is None and req.completion:
+                # The `generate` function can be called for some chat models.
+                # So that we cannot convert completion chunk to chat completion chunk.
+                if req.call_ability == "generate":
+                    results = []
+                    for c in req.completion:
+                        if c == "<bos_stream>":
+                            continue
+                        elif c == "<eos_stream>":
+                            break
+                        else:
+                            results.append(c)
+                    req.completion = results
+                    continue
                 if req.stream:
                     results = []
                     for i, c in enumerate(req.completion):

xinference 0.14.4.post1__py3-none-any.whl → 0.15.1__py3-none-any.whl

Potentially problematic release.

xinference 0.14.4.post1py3-none-any.whl → 0.15.1py3-none-any.whl