PyPI - xinference - Versions diffs - 1.10.1__py3-none-any.whl → 1.11.0.post1__py3-none-any.whl - Mend

xinference 1.10.1py3-none-any.whl → 1.11.0.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (39) hide show

xinference/model/llm/mlx/distributed_models/core.py CHANGED Viewed

@@ -162,3 +162,44 @@ class DistributedModelMixin:
         self.layers = self.layers[: self.end_idx]
         self.layers[: self.start_idx] = [None] * self.start_idx
         self.num_layers = len(self.layers) - self.start_idx
+class SafeKVCache:
+    """
+    A safe wrapper around mlx_lm's KVCache that handles None keys gracefully.
+    This is needed because mlx_lm's generate function accesses cache.state
+    before the cache is properly initialized.
+    """
+    def __init__(self):
+        from mlx_lm.models.cache import KVCache
+        self._cache = KVCache()
+    @property
+    def state(self):
+        # Safe access to state property
+        if self._cache.keys is None:
+            return None, None
+        if self._cache.offset == self._cache.keys.shape[2]:
+            return self._cache.keys, self._cache.values
+        else:
+            return (
+                self._cache.keys[..., : self._cache.offset, :],
+                self._cache.values[..., : self._cache.offset, :],
+            )
+    @state.setter
+    def state(self, v):
+        # Safe setter for state property
+        if v is None or v[0] is None:
+            self._cache.keys = None
+            self._cache.values = None
+            self._cache.offset = 0
+        else:
+            self._cache.keys, self._cache.values = v
+            self._cache.offset = self._cache.keys.shape[2]
+    def __getattr__(self, name):
+        # Delegate all other attributes and methods to the underlying cache
+        return getattr(self._cache, name)

xinference/model/llm/mlx/distributed_models/qwen2.py CHANGED Viewed

@@ -46,11 +46,10 @@ class Qwen2Model(_Qwen2Model, DistributedModelMixin):
         pipeline_rank = self.rank
         pipeline_size = self.world_size
-        if mask is None:
-            mask = create_attention_mask(h, cache)
         if cache is None:
             cache = [None] * self.num_layers
+        mask = create_attention_mask(h, cache[0])
         # Receive from the previous process in the pipeline

xinference/model/llm/sglang/core.py CHANGED Viewed

@@ -362,9 +362,16 @@ class SGLANGModel(LLM):
     def _convert_state_to_completion_chunk(
         request_id: str, model: str, output_text: str, meta_info: Dict
     ) -> CompletionChunk:
-        finish_reason = meta_info.get("finish_reason", None)
-        if isinstance(finish_reason, dict) and "type" in finish_reason:
-            finish_reason = finish_reason["type"]
+        finish_reason_raw = meta_info.get("finish_reason", None)
+        finish_reason: Optional[str] = None
+        if isinstance(finish_reason_raw, dict) and "type" in finish_reason_raw:
+            finish_reason = (
+                str(finish_reason_raw["type"])
+                if finish_reason_raw["type"] is not None
+                else None
+            )
+        elif isinstance(finish_reason_raw, str):
+            finish_reason = finish_reason_raw
         choices: List[CompletionChoice] = [
             CompletionChoice(
                 text=output_text,
@@ -392,9 +399,16 @@ class SGLANGModel(LLM):
     def _convert_state_to_completion(
         request_id: str, model: str, output_text: str, meta_info: Dict
     ) -> Completion:
-        finish_reason = meta_info.get("finish_reason", None)
-        if isinstance(finish_reason, dict) and "type" in finish_reason:
-            finish_reason = finish_reason["type"]
+        finish_reason_raw = meta_info.get("finish_reason", None)
+        finish_reason: Optional[str] = None
+        if isinstance(finish_reason_raw, dict) and "type" in finish_reason_raw:
+            finish_reason = (
+                str(finish_reason_raw["type"])
+                if finish_reason_raw["type"] is not None
+                else None
+            )
+        elif isinstance(finish_reason_raw, str):
+            finish_reason = finish_reason_raw
         choices = [
             CompletionChoice(
                 text=output_text,

xinference/model/llm/tool_parsers/qwen_tool_parser.py CHANGED Viewed

@@ -59,10 +59,28 @@ class QwenToolParser(ToolParser):
         Returns:
             str: Extracted JSON string or original string if no match found.
         """
+        # First try to find complete tool calls
         function_calls = self.tool_call_complete_regex.findall(function_call_str)
-        if len(function_calls) == 0:
-            return function_call_str
-        return function_calls[-1]
+        if len(function_calls) > 0:
+            return function_calls[-1]
+        # If no complete tool calls found, try to extract from incomplete tool calls
+        # Handle cases like <tool_call><tool_call>_city
+        if self.tool_call_start_token in function_call_str:
+            # Extract content between the last tool_call start token and end of string
+            last_start = function_call_str.rfind(self.tool_call_start_token)
+            potential_json = function_call_str[
+                last_start + len(self.tool_call_start_token) :
+            ]
+            # Remove any trailing tool_call end tokens
+            if self.tool_call_end_token in potential_json:
+                potential_json = potential_json.split(self.tool_call_end_token)[0]
+            # Clean up any extra whitespace
+            potential_json = potential_json.strip()
+            if potential_json:
+                return potential_json
+        return function_call_str
     def _parse_json_function_call_stream(
         self,
@@ -229,7 +247,14 @@ class QwenToolParser(ToolParser):
                 try:
                     parsed_json = self._parse_json_function_call(function_call)
                     res = json.loads(parsed_json, strict=False)
-                    results.append((None, res["name"], res["arguments"]))
+                    # Validate that we have the required fields
+                    if "name" in res and "arguments" in res:
+                        results.append((None, res["name"], res["arguments"]))
+                    else:
+                        logger.warning(
+                            "Invalid tool call format, missing required fields: %s", res
+                        )
+                        results.append((function_call, None, None))
                 except Exception as e:
                     logger.error(
                         "Can't parse single qwen tool call output: %s. Error: %s",

xinference/model/llm/transformers/chatglm.py CHANGED Viewed

@@ -472,6 +472,9 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                     r.prompt = self._process_messages(
                         r.prompt, tools=tools, tool_choice=tool_choice
                     )
+                    assert isinstance(
+                        r.prompt, list
+                    ), "r.prompt must be a list after processing"
                     r.full_prompt = self.get_full_context(
                         r.prompt,
                         self.model_family.chat_template,  # type: ignore

xinference/model/llm/transformers/core.py CHANGED Viewed

@@ -48,6 +48,7 @@ from ..utils import (
 )
 from .utils import (
     _get_pad_param,
+    convert_to_cache_cls,
     get_context_length,
     get_max_src_len,
     pad_prefill_tokens,
@@ -573,6 +574,7 @@ class PytorchModel(LLM):
                     ]
                 )
             data.append(x)
         return torch.stack(data).to(self._device)
     def build_prefill_position_ids(
@@ -713,30 +715,105 @@ class PytorchModel(LLM):
         from torch.nn.functional import pad
         from transformers import DynamicCache
+        # Handle case where past_cache is None
+        if past_cache is None:
+            return new_cache
+        # Convert both caches to DynamicCache if not already
+        if not isinstance(past_cache, DynamicCache):
+            past_cache = convert_to_cache_cls(past_cache)
+        if not isinstance(new_cache, DynamicCache):
+            new_cache = convert_to_cache_cls(new_cache)
         _, seq_len_idx = self.get_batch_size_and_seq_len_indexes_from_kv()
-        past_seq_len = past_cache[0][0].shape[seq_len_idx]
-        new_seq_len = new_cache[0][0].shape[seq_len_idx]
+        # Handle empty caches
+        if len(past_cache) == 0:
+            return new_cache
+        if len(new_cache) == 0:
+            return past_cache
+        # Get first layer seq_len safely
+        past_first = past_cache[0] if len(past_cache) > 0 else (None, None)
+        new_first = new_cache[0] if len(new_cache) > 0 else (None, None)
+        if past_first[0] is None or past_first[1] is None:
+            return new_cache
+        if new_first[0] is None or new_first[1] is None:
+            return past_cache
+        past_seq_len = past_first[0].shape[seq_len_idx]
+        new_seq_len = new_first[0].shape[seq_len_idx]
+        # Pad the shorter cache
         if past_seq_len != new_seq_len:
-            padding_target = new_cache if past_seq_len > new_seq_len else past_cache
-            padding_len = abs(past_seq_len - new_seq_len)
+            if past_seq_len > new_seq_len:
+                padding_target = new_cache
+                padding_len = past_seq_len - new_seq_len
+            else:
+                padding_target = past_cache
+                padding_len = new_seq_len - past_seq_len
             pad_param = _get_pad_param(seq_len_idx, padding_len)
             for idx in range(len(padding_target)):
                 k = padding_target.key_cache[idx]
                 v = padding_target.value_cache[idx]
-                _k = pad(k, pad_param)
-                _v = pad(v, pad_param)
-                padding_target.key_cache[idx] = _k
-                padding_target.value_cache[idx] = _v
+                if k is not None and v is not None:
+                    padding_target.key_cache[idx] = pad(k, pad_param)
+                    padding_target.value_cache[idx] = pad(v, pad_param)
+        # Merge caches
         ret_kv = DynamicCache()
-        for idx in range(len(past_cache)):
-            k1, k2 = new_cache.key_cache[idx], past_cache.key_cache[idx]
-            v1, v2 = new_cache.value_cache[idx], past_cache.value_cache[idx]
-            ret_kv.update(
-                torch.cat((k1, k2), 0).contiguous(),
-                torch.cat((v1, v2), 0).contiguous(),
-                idx,
-            )
+        max_layers = max(len(past_cache), len(new_cache))
+        for idx in range(max_layers):
+            past_k = past_cache.key_cache[idx] if idx < len(past_cache) else None
+            past_v = past_cache.value_cache[idx] if idx < len(past_cache) else None
+            new_k = new_cache.key_cache[idx] if idx < len(new_cache) else None
+            new_v = new_cache.value_cache[idx] if idx < len(new_cache) else None
+            if past_k is not None and new_k is not None:
+                # Both layers exist - validate tensor dimensions before concatenation
+                if past_k.dim() != new_k.dim():
+                    logger.error(
+                        f"KV cache tensor dimension mismatch at layer {idx}: "
+                        f"past_k.dim()={past_k.dim()}, new_k.dim()={new_k.dim()}"
+                    )
+                    # Use the cache with higher batch size
+                    if past_k.shape[0] >= new_k.shape[0]:
+                        ret_kv.update(past_k, past_v, idx)
+                    else:
+                        ret_kv.update(new_k, new_v, idx)
+                    continue
+                if past_k.shape[1:] == new_k.shape[1:]:
+                    # Shapes are compatible, concatenate along batch dimension
+                    ret_kv.update(
+                        torch.cat((new_k, past_k), 0).contiguous(),
+                        torch.cat((new_v, past_v), 0).contiguous(),
+                        idx,
+                    )
+                else:
+                    # Detailed logging for shape mismatch
+                    logger.warning(
+                        f"KV cache shape mismatch at layer {idx}: "
+                        f"past_k.shape={past_k.shape}, new_k.shape={new_k.shape}. "
+                        f"This may be due to inconsistent batch sizes in continuous batching."
+                    )
+                    # Choose the cache with larger batch size to preserve more data
+                    if past_k.shape[0] >= new_k.shape[0]:
+                        ret_kv.update(past_k, past_v, idx)
+                    else:
+                        ret_kv.update(new_k, new_v, idx)
+            elif past_k is not None:
+                ret_kv.update(past_k, past_v, idx)
+            elif new_k is not None:
+                ret_kv.update(new_k, new_v, idx)
+            else:
+                # both None, fill with None
+                ret_kv.update(None, None, idx)
         return ret_kv
     def prepare_batch_inference(self, req_list: List[InferenceRequest]):

xinference/model/llm/transformers/multimodal/minicpmv45.py ADDED Viewed

@@ -0,0 +1,340 @@
+# Copyright 2022-2025 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+import torch
+from PIL import Image
+from .....core.model import register_batching_multimodal_models
+from .....model.utils import select_device
+from .....types import PytorchModelConfig
+from ....scheduler.request import InferenceRequest
+from ...llm_family import LLMFamilyV2, LLMSpecV1, register_transformer
+from ...utils import _decode_image, parse_messages
+from ..core import register_non_default_model
+from .core import PytorchMultiModalModel
+logger = logging.getLogger(__name__)
+@register_batching_multimodal_models("MiniCPM-V-4.5")
+@register_transformer
+@register_non_default_model("MiniCPM-V-4.5")
+class MiniCPMV45Model(PytorchMultiModalModel):
+    @classmethod
+    def match_json(
+        cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
+    ) -> bool:
+        family = model_family.model_family or model_family.model_name
+        if "MiniCPM-V-4.5".lower() in family.lower():
+            return True
+        return False
+    def _sanitize_model_config(
+        self, pytorch_model_config: Optional[PytorchModelConfig]
+    ) -> PytorchModelConfig:
+        pytorch_model_config = super()._sanitize_model_config(pytorch_model_config)
+        assert pytorch_model_config is not None
+        # Configure pixel parameters for MiniCPM-V-4.5
+        pytorch_model_config.setdefault("min_pixels", 256 * 28 * 28)
+        pytorch_model_config.setdefault("max_pixels", 1280 * 28 * 28)
+        return pytorch_model_config
+    def decide_device(self):
+        device = self._pytorch_model_config.get("device", "auto")
+        self._device = select_device(device)
+        self._device = (
+            "auto"
+            if self._device == "cuda" and self.quantization is None
+            else self._device
+        )
+    def load_processor(self):
+        from transformers import AutoProcessor, AutoTokenizer
+        min_pixels = self._pytorch_model_config.get("min_pixels")
+        max_pixels = self._pytorch_model_config.get("max_pixels")
+        self._processor = AutoProcessor.from_pretrained(
+            self.model_path,
+            trust_remote_code=True,
+            min_pixels=min_pixels,
+            max_pixels=max_pixels,
+        )
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            self.model_path, trust_remote_code=True
+        )
+    def load_multimodal_model(self):
+        from transformers import AutoModel
+        from transformers.generation import GenerationConfig
+        if "int4" in self.model_path:
+            model = AutoModel.from_pretrained(self.model_path, trust_remote_code=True)
+        else:
+            kwargs = self.apply_bnb_quantization()
+            model = AutoModel.from_pretrained(
+                self.model_path,
+                trust_remote_code=True,
+                torch_dtype=torch.float16,
+                device_map=self._device,
+                **kwargs,
+            )
+        self._model = model.eval()
+        # Specify hyperparameters for generation
+        self._model.generation_config = GenerationConfig.from_pretrained(
+            self.model_path,
+            trust_remote_code=True,
+        )
+        self._device = self._model.device
+    def _message_content_to_chat(self, content):
+        MAX_NUM_FRAMES = 64
+        def encode_video(video_path):
+            from decord import VideoReader, cpu
+            def uniform_sample(l, n):
+                gap = len(l) / n
+                idxs = [int(i * gap + gap / 2) for i in range(n)]
+                return [l[i] for i in idxs]
+            vr = VideoReader(video_path, ctx=cpu(0))
+            sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+            frame_idx = [i for i in range(0, len(vr), sample_fps)]
+            if len(frame_idx) > MAX_NUM_FRAMES:
+                frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+            frames = vr.get_batch(frame_idx).asnumpy()
+            frames = [Image.fromarray(v.astype("uint8")) for v in frames]
+            logger.info(
+                f"Num frames: {len(frames)} when decoding video for {self.model_uid}"
+            )
+            return frames
+        def _load_video(_url):
+            frames = None
+            if _url.startswith("data:"):
+                raise RuntimeError("Only video url format is supported")
+            else:
+                frames = encode_video(_url)
+            return frames
+        if not isinstance(content, str):
+            texts = []
+            image_urls = []
+            video_urls = []
+            for c in content:
+                c_type = c.get("type")
+                if c_type == "text":
+                    texts.append(c["text"])
+                elif c_type == "image_url":
+                    image_urls.append(c["image_url"]["url"])
+                elif c_type == "video_url":
+                    video_urls.append(c["video_url"]["url"])
+            image_futures = []
+            with ThreadPoolExecutor() as executor:
+                for image_url in image_urls:
+                    fut = executor.submit(_decode_image, image_url)
+                    image_futures.append(fut)
+            images = [fut.result() for fut in image_futures]
+            frames = []
+            if len(video_urls) > 1:
+                raise RuntimeError("Only one video per message is supported")
+            for v in video_urls:
+                frames = _load_video(v)
+            text = " ".join(texts)
+            return text, images, frames
+        return content, [], []
+    def _convert_to_specific_style(self, messages: List[Dict]) -> Tuple:
+        video_existed = False
+        prompt, _, chat_history = parse_messages(messages)
+        content, images_chat, video_frames = self._message_content_to_chat(prompt)
+        if len(video_frames) > 0:
+            video_existed = True
+            images_chat = video_frames
+        msgs = []
+        query_to_response: List[Dict] = []
+        for h in chat_history or []:
+            images_history = []
+            role = h["role"]
+            content_h, images_tmp, video_frames_h = self._message_content_to_chat(
+                h["content"]
+            )
+            if images_tmp != []:
+                images_history = images_tmp
+            if len(video_frames_h) > 0:
+                video_existed = True
+                images_history = video_frames_h
+            if len(query_to_response) == 0 and role == "user":
+                query_to_response.append(
+                    {"role": "user", "content": images_history + [content_h]}
+                )
+            if len(query_to_response) == 1 and role == "assistant":
+                query_to_response.append(
+                    {"role": "assistant", "content": images_history + [content_h]}
+                )
+            if len(query_to_response) == 2:
+                msgs.extend(query_to_response)
+                query_to_response = []
+        msgs.append({"role": "user", "content": images_chat + [content]})
+        return msgs, video_existed
+    def build_inputs_from_messages(
+        self,
+        messages: List[Dict],
+        generate_config: Dict,
+    ):
+        msgs, video_existed = self._convert_to_specific_style(messages)
+        # Set decode params for video
+        params = {}
+        if video_existed:
+            params = {"use_image_id": False, "max_slice_nums": 1}
+        return dict(msgs=msgs, image=None, **params)
+    def build_generate_kwargs(
+        self,
+        generate_config: Dict,
+    ) -> Dict[str, Any]:
+        return dict(**generate_config)
+    def build_streaming_iter(
+        self,
+        messages: List[Dict],
+        generate_config: Dict,
+    ) -> Tuple[Iterator, int]:
+        inputs = self.build_inputs_from_messages(messages, generate_config)
+        config = self.build_generate_kwargs(generate_config)
+        chat_iter = self._model.chat(
+            **inputs, **config, tokenizer=self._tokenizer, sampling=True
+        )
+        return chat_iter, -1
+    def prepare_sanitize_generate_config(self, req: InferenceRequest):
+        """
+        Refer to MiniCPM-V-4.5 documentation for generation parameters
+        """
+        raw_config = req.inference_kwargs.get("raw_params", {})
+        temperature = raw_config.get("temperature", None)
+        if temperature is None:
+            raw_config["temperature"] = 0.7
+        top_p = raw_config.get("top_p", None)
+        if top_p is None:
+            raw_config["top_p"] = 0.8
+        top_k = raw_config.get("top_k", None)
+        if top_k is None:
+            raw_config["top_k"] = 100
+        repetition_penalty = raw_config.get("repetition_penalty", None)
+        if repetition_penalty is None:
+            raw_config["repetition_penalty"] = 1.05
+        return raw_config
+    def _handle_input_ids_and_images(self, msgs: List[Dict]) -> Dict:
+        """
+        Handle input IDs and images for MiniCPM-V-4.5
+        Based on MiniCPM-V-2.6 implementation with adaptations for 4.5
+        """
+        from copy import deepcopy
+        copy_msgs = deepcopy(msgs)
+        images = []
+        for i, msg in enumerate(copy_msgs):
+            role = msg["role"]
+            content = msg["content"]
+            assert role in ["user", "assistant"]
+            if i == 0:
+                assert role == "user", "The role of first msg should be user"
+            if isinstance(content, str):
+                content = [content]
+            cur_msgs = []
+            for c in content:
+                if isinstance(c, Image.Image):
+                    images.append(c)
+                    cur_msgs.append("(<image>./</image>)")
+                elif isinstance(c, str):
+                    cur_msgs.append(c)
+            msg["content"] = "\n".join(cur_msgs)
+        return {
+            "prompt": self._processor.tokenizer.apply_chat_template(
+                copy_msgs, tokenize=False, add_generation_prompt=True
+            ),
+            "input_image": images,
+        }
+    def _get_full_prompt(self, messages: List[Dict], tools, generate_config: dict):  # type: ignore
+        msgs, video_existed = self._convert_to_specific_style(messages)
+        if video_existed:
+            raise RuntimeError(
+                f"Continuous batching does not support video inputs for this model: {self.model_uid}"
+            )
+        return self._handle_input_ids_and_images(msgs)
+    def build_prefill_kwargs(self, prompts: List, req_list: List[InferenceRequest]):
+        prompts_lists = [x["prompt"] for x in prompts]
+        input_images_lists = [x["input_image"] for x in prompts]
+        inputs = self._processor(
+            prompts_lists,
+            input_images_lists,
+            max_slice_nums=None,
+            use_image_id=None,
+            return_tensors="pt",
+            max_length=8192,
+        ).to(self._model.device)
+        inputs.pop("image_sizes")
+        masked_input_ids = inputs["input_ids"] * inputs["attention_mask"]
+        for i in range(masked_input_ids.shape[0]):
+            non_zero_values = masked_input_ids[i][masked_input_ids[i] != 0].tolist()
+            req_list[i].prompt_tokens = non_zero_values
+            req_list[i].extra_kwargs["attention_mask_seq_len"] = len(non_zero_values)
+            req_list[i].padding_len = masked_input_ids.shape[1] - len(non_zero_values)
+        model_inputs = {
+            "input_ids": inputs["input_ids"],
+            "image_bound": inputs["image_bound"],
+            "pixel_values": inputs["pixel_values"],
+            "tgt_sizes": inputs["tgt_sizes"],
+        }
+        model_inputs["inputs_embeds"], _ = self._model.get_vllm_embedding(model_inputs)
+        return {
+            "inputs_embeds": model_inputs["inputs_embeds"],
+            "attention_mask": inputs["attention_mask"],
+        }
+    def build_decode_position_ids(
+        self, batch_size: int, seq_length: int, reqs: List[InferenceRequest]
+    ):
+        return None
+    def batch_inference(self, req_list: List[InferenceRequest]):
+        """
+        This method is rewritten
+        because the specific inference process is performed by `self._model.llm`,
+        not `self._model` itself
+        """
+        from ..utils import batch_inference_one_step
+        self.prepare_batch_inference(req_list)
+        batch_inference_one_step(
+            self, req_list, self.model_uid, self._model.llm, self._tokenizer
+        )
+        self.handle_batch_inference_results(req_list)

xinference/model/llm/transformers/utils.py CHANGED Viewed

@@ -281,7 +281,10 @@ def _batch_inference_one_step_internal(
             r.append_new_token(token)
         if decode_reqs:
+            # Ensure all decode requests have the same kv_cache reference
+            # This prevents batch size mismatches during merging
             decode_kv = decode_reqs[0].kv_cache
             # prefill and decode kv cache need to be merged at `batch_size` and `seq_len` dimensions.
             merged_kv_cache = xinf_model_obj.merge_kv_cache(decode_kv, past_key_values)
             for r in valid_req_list:

xinference 1.10.1__py3-none-any.whl → 1.11.0.post1__py3-none-any.whl

Potentially problematic release.

xinference 1.10.1py3-none-any.whl → 1.11.0.post1py3-none-any.whl