PyPI - xinference - Versions diffs - 1.3.0.post1__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

xinference 1.3.0.post1py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (52) hide show

xinference/model/llm/sglang/core.py CHANGED Viewed

@@ -48,6 +48,7 @@ class SGLANGModelConfig(TypedDict, total=False):
     nnodes: Optional[int]
     node_rank: Optional[int]
     dist_init_addr: Optional[str]
+    reasoning_content: bool
 class SGLANGGenerateConfig(TypedDict, total=False):
@@ -99,6 +100,7 @@ SGLANG_SUPPORTED_CHAT_MODELS = [
     "qwen2.5-instruct",
     "qwen2.5-coder-instruct",
     "QwQ-32B-Preview",
+    "QwQ-32B",
     "deepseek-r1-distill-qwen",
     "deepseek-r1-distill-llama",
     "deepseek-v3",
@@ -143,6 +145,8 @@ class SGLANGModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
         self._model_config = self._sanitize_model_config(self._model_config)
+        reasoning_content = self._model_config.pop("reasoning_content")
+        self.prepare_parse_reasoning_content(reasoning_content)
         # Fix: GH#2169
         if sgl.__version__ >= "0.2.14":
@@ -255,6 +259,7 @@ class SGLANGModel(LLM):
             else:
                 model_config["mem_fraction_static"] = 0.88
         model_config.setdefault("log_level", "info")
+        model_config.setdefault("reasoning_content", False)
         return model_config
@@ -547,8 +552,8 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
         if stream:
             agen = await self.async_generate(full_prompt, generate_config)  # type: ignore
             assert isinstance(agen, AsyncGenerator)
-            return self._async_to_chat_completion_chunks(agen)
+            return self._async_to_chat_completion_chunks(agen, self.reasoning_parser)
         else:
             c = await self.async_generate(full_prompt, generate_config)  # type: ignore
             assert not isinstance(c, AsyncGenerator)
-            return self._to_chat_completion(c)
+            return self._to_chat_completion(c, self.reasoning_parser)

xinference/model/llm/transformers/chatglm.py CHANGED Viewed

@@ -383,7 +383,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                 function_call = self._process_response_non_streaming(
                     response, tools, use_tool=True
                 )
-                return self._tool_calls_completion(
+                return self._post_process_completion(
                     self.model_family, self.model_uid, function_call
                 )
             else:
@@ -397,7 +397,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                 prompt_tokens = len(inputs["input_ids"][0])
                 for chunk_text in self._stream_chat(inputs, tools, **kwargs):
                     if tools and isinstance(chunk_text, dict):
-                        yield self._tool_calls_completion_chunk(
+                        yield self._post_process_completion_chunk(
                             self.model_family, self.model_uid, chunk_text
                         )
                         return
@@ -484,7 +484,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
             function_call = self._process_response_non_streaming(
                 response, req.tools, use_tool=True
             )
-            req.completion[0] = self._tool_calls_completion(
+            req.completion[0] = self._post_process_completion(
                 self.model_family, self.model_uid, function_call
             )
             req.completion[0]["usage"] = usage
@@ -516,7 +516,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
                             c for c in req.completion if not isinstance(c, str)
                         ][0]["id"]
                         results.append(
-                            self._tool_calls_completion_chunk(
+                            self._post_process_completion_chunk(
                                 self.model_family,
                                 self.model_uid,
                                 new_response,

xinference/model/llm/transformers/core.py CHANGED Viewed

@@ -61,6 +61,8 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
     "deepseek-vl-chat",
     "internvl-chat",
     "internvl2",
+    "Internvl2.5",
+    "Internvl2.5-MPO",
     "cogvlm2",
     "cogvlm2-video-llama3-chat",
     "MiniCPM-Llama3-V-2_5",
@@ -112,6 +114,7 @@ class PytorchModel(LLM):
         pytorch_model_config.setdefault("trust_remote_code", True)
         pytorch_model_config.setdefault("max_num_seqs", 16)
         pytorch_model_config.setdefault("enable_tensorizer", False)
+        pytorch_model_config.setdefault("reasoning_content", False)
         return pytorch_model_config
     def _sanitize_generate_config(
@@ -324,6 +327,9 @@ class PytorchModel(LLM):
             kwargs.update({"device_map": "auto"})
             is_device_map_auto = True
+        reasoning_content = self._pytorch_model_config.pop("reasoning_content")
+        self.prepare_parse_reasoning_content(reasoning_content)
         if self._check_tensorizer_integrity():
             self._model, self._tokenizer = self._load_tensorizer(**kwargs)
         else:
@@ -714,23 +720,34 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
     def handle_chat_result_non_streaming(self, req: InferenceRequest):
         if req.tools:
-            req.completion[0] = self._tool_calls_completion(
-                self.model_family, self.model_uid, req.completion[0]
+            req.completion[0] = self._post_process_completion(
+                self.model_family,
+                self.model_uid,
+                req.completion[0],
+                self.reasoning_parser,
             )
         else:
-            req.completion[0] = self._to_chat_completion(req.completion[0])
+            req.completion[0] = self._to_chat_completion(
+                req.completion[0], self.reasoning_parser
+            )
     def handle_chat_result_streaming(self, req: InferenceRequest):
         results = []
         for i, c in enumerate(req.completion):
             if c == "<bos_stream>":
                 results.append(
-                    self._get_first_chat_completion_chunk(req.completion[i + 1])
+                    self._get_first_chat_completion_chunk(
+                        req.completion[i + 1], self.reasoning_parser
+                    )
                 )
             elif c == "<eos_stream>":
                 break
             else:
-                results.append(self._to_chat_completion_chunk(c))
+                results.append(
+                    self._to_chat_completion_chunk(
+                        c, self.reasoning_parser, req.previous_texts
+                    )
+                )
         if req.stopped and req.include_usage:
             results.append(self._get_final_chat_completion_chunk(req.completion[-1]))

xinference/model/llm/transformers/intern_vl.py CHANGED Viewed

@@ -265,7 +265,8 @@ class InternVLChatModel(PytorchChatModel):
         if world_size == 1:
             return None
         model_size = f"{self.model_spec.model_size_in_billions}B"
-        model_name = f"{self.model_family.model_name.lower()}-{model_size}"
+        model_name = self.model_family.model_name.lower().replace("-mpo", "")
+        model_name = f"{model_name}-{model_size}"
         num_layers = {
             "internvl2-1B": 24,
             "internvl2-2B": 24,

xinference/model/llm/transformers/utils.py CHANGED Viewed

@@ -132,7 +132,7 @@ def _pad_seqs_inplace(seqs: List[List[int]], reqs: List[InferenceRequest], pad:
 def get_max_src_len(context_len: int, r: InferenceRequest) -> int:
     max_new_tokens = int(
-        r.sanitized_generate_config.get("max_tokens", max_tokens_field.default)
+        r.sanitized_generate_config.get("max_tokens") or max_tokens_field.default
     )
     return context_len - max_new_tokens - 8

xinference/model/llm/utils.py CHANGED Viewed

@@ -41,6 +41,7 @@ from ...types import (
     ChatCompletion,
     ChatCompletionChoice,
     ChatCompletionChunk,
+    ChatCompletionChunkDelta,
     ChatCompletionMessage,
     Completion,
     CompletionChoice,
@@ -243,62 +244,73 @@ class ChatModelMixin:
             raise ValueError(f"Invalid model family: {model_family}")
     @classmethod
-    def _to_chat_completion_chunk(cls, chunk: CompletionChunk) -> ChatCompletionChunk:
-        choices = chunk.get("choices")
-        if (
-            chunk.get("object") == "chat.completion.chunk"
-            and choices
-            and "delta" in choices[0]
-        ):
-            # Already a ChatCompletionChunk, we don't need to convert chunk.
-            return cast(ChatCompletionChunk, chunk)
+    def _to_chat_completion_chunk(
+        cls,
+        chunk: CompletionChunk,
+        reasoning_parser: Optional[ReasoningParser] = None,
+        previous_texts: Optional[List[str]] = None,
+    ) -> ChatCompletionChunk:
+        choices_list = []
+        for i, choice in enumerate(chunk["choices"]):
+            delta = ChatCompletionChunkDelta()
+            if "text" in choice and choice["finish_reason"] is None:
+                if reasoning_parser is None:
+                    delta["content"] = choice["text"]
+                else:
+                    assert previous_texts is not None
+                    current_text = previous_texts[-1] + choice["text"]
+                    delta = reasoning_parser.extract_reasoning_content_streaming(
+                        previous_text=previous_texts[-1],
+                        current_text=current_text,
+                        delta_text=choice["text"],
+                    )
+                    previous_texts[-1] = current_text
+            if "tool_calls" in choice:
+                delta["tool_calls"] = choice["tool_calls"]
+            choices_list.append(
+                {
+                    "index": i,
+                    "delta": delta,
+                    "finish_reason": choice["finish_reason"],
+                }
+            )
         chat_chunk = {
             "id": "chat" + chunk["id"],
             "model": chunk["model"],
             "created": chunk["created"],
             "object": "chat.completion.chunk",
-            "choices": [
-                {
-                    "index": i,
-                    "delta": {
-                        **(
-                            {"content": choice["text"]}
-                            if ("text" in choice and choice["finish_reason"] is None)
-                            else {}
-                        ),
-                        **(
-                            {"tool_calls": choice["tool_calls"]}
-                            if "tool_calls" in choice
-                            else {}
-                        ),
-                    },
-                    "finish_reason": choice["finish_reason"],
-                }
-                for i, choice in enumerate(chunk["choices"])
-            ],
+            "choices": choices_list,
         }
         return cast(ChatCompletionChunk, chat_chunk)
     @classmethod
     def _get_first_chat_completion_chunk(
-        cls, chunk: CompletionChunk
+        cls,
+        chunk: CompletionChunk,
+        reasoning_parser: Optional[ReasoningParser] = None,
     ) -> ChatCompletionChunk:
+        choices_list = []
+        for i, choice in enumerate(chunk["choices"]):
+            delta = {
+                "role": "assistant",
+            }
+            if reasoning_parser is None:
+                delta["content"] = ""
+            else:
+                delta["reasoning_content"] = ""
+            choices_list.append(
+                {
+                    "index": i,
+                    "delta": delta,
+                    "finish_reason": None,
+                }
+            )
         chat_chunk = {
             "id": "chat" + chunk["id"],
             "model": chunk["model"],
             "created": chunk["created"],
             "object": "chat.completion.chunk",
-            "choices": [
-                {
-                    "index": i,
-                    "delta": {
-                        "role": "assistant",
-                        "content": "",
-                    },
-                    "finish_reason": None,
-                }
-                for i, choice in enumerate(chunk["choices"])
-            ],
+            "choices": choices_list,
         }
         return cast(ChatCompletionChunk, chat_chunk)
@@ -324,15 +336,18 @@ class ChatModelMixin:
         chunks: Iterator[CompletionChunk],
         reasoning_parse: Optional[ReasoningParser] = None,
     ) -> Iterator[ChatCompletionChunk]:
+        previous_texts = [""]
         for i, chunk in enumerate(chunks):
             if i == 0:
-                yield cls._get_first_chat_completion_chunk(chunk)
+                yield cls._get_first_chat_completion_chunk(chunk, reasoning_parse)
             # usage
             choices = chunk.get("choices")
             if not choices:
                 yield cls._get_final_chat_completion_chunk(chunk)
             else:
-                yield cls._to_chat_completion_chunk(chunk)
+                yield cls._to_chat_completion_chunk(
+                    chunk, reasoning_parse, previous_texts
+                )
     @classmethod
     def _tools_to_messages_for_deepseek(
@@ -370,33 +385,19 @@ class ChatModelMixin:
         reasoning_parser: Optional[ReasoningParser] = None,
     ) -> AsyncGenerator[ChatCompletionChunk, None]:
         i = 0
-        previous_text = ""
-        current_text = ""
+        previous_texts = [""]
         async for chunk in chunks:
             if i == 0:
-                chat_chunk = cls._get_first_chat_completion_chunk(chunk)
+                chat_chunk = cls._get_first_chat_completion_chunk(
+                    chunk, reasoning_parser
+                )
             elif not chunk.get("choices"):
                 # usage
                 chat_chunk = cls._get_final_chat_completion_chunk(chunk)
             else:
-                chat_chunk = cls._to_chat_completion_chunk(chunk)
-            if reasoning_parser is not None:
-                choices = chat_chunk.get("choices")
-                if choices is None:
-                    continue
-                for choice in choices:
-                    delta = choice.get("delta")
-                    if not delta:
-                        continue
-                    current_text = previous_text + delta.get("content", "")
-                    choice[
-                        "delta"
-                    ] = reasoning_parser.extract_reasoning_content_streaming(
-                        previous_text=previous_text,
-                        current_text=current_text,
-                        delta=delta,
-                    )
-                    previous_text = current_text
+                chat_chunk = cls._to_chat_completion_chunk(
+                    chunk, reasoning_parser, previous_texts
+                )
             yield chat_chunk
             i += 1
@@ -565,7 +566,14 @@ class ChatModelMixin:
         return result
     @classmethod
-    def _tool_calls_completion_chunk(cls, model_family, model_uid, c, chunk_id=None):
+    def _post_process_completion_chunk(
+        cls,
+        model_family,
+        model_uid,
+        c,
+        chunk_id=None,
+        reasoning_parser: Optional[ReasoningParser] = None,
+    ):
         _id = chunk_id if chunk_id is not None else str(uuid.uuid4())
         tool_result = cls._eval_tool_arguments(model_family, c)
         tool_calls = []
@@ -585,11 +593,22 @@ class ChatModelMixin:
             else:
                 failed_contents.append(content)
         finish_reason = "tool_calls" if tool_calls else "stop"
+        reasoning_content = None
+        content = ". ".join(failed_contents) if failed_contents else None
+        if reasoning_parser is not None:
+            reasoning_content, content = reasoning_parser.extract_reasoning_content(  # type: ignore
+                content
+            )
         d = {
             "role": "assistant",
-            "content": ". ".join(failed_contents) if failed_contents else None,
+            "content": content,
             "tool_calls": tool_calls,
         }
+        # add only reasoning_content is None
+        if reasoning_content is not None:
+            d["reasoning_content"] = reasoning_content
         try:
             usage = c.get("usage")
             assert "prompt_tokens" in usage
@@ -616,7 +635,13 @@ class ChatModelMixin:
         }
     @classmethod
-    def _tool_calls_completion(cls, model_family, model_uid, c):
+    def _post_process_completion(
+        cls,
+        model_family,
+        model_uid,
+        c,
+        reasoning_parser: Optional[ReasoningParser] = None,
+    ):
         _id = str(uuid.uuid4())
         tool_result = cls._eval_tool_arguments(model_family, c)
@@ -637,11 +662,22 @@ class ChatModelMixin:
             else:
                 failed_contents.append(content)
         finish_reason = "tool_calls" if tool_calls else "stop"
+        reasoning_content = None
+        content = ". ".join(failed_contents) if failed_contents else None
+        if reasoning_parser is not None:
+            reasoning_content, content = reasoning_parser.extract_reasoning_content(  # type: ignore
+                content
+            )
         m = {
             "role": "assistant",
-            "content": ". ".join(failed_contents) if failed_contents else None,
+            "content": content,
             "tool_calls": tool_calls,
         }
+        # add only reasoning_content is None
+        if reasoning_content is not None:
+            m["reasoning_content"] = reasoning_content
         try:
             usage = c.get("usage")
             assert "prompt_tokens" in usage

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -43,8 +43,6 @@ from ....types import (
 )
 from .. import LLM, LLMFamilyV1, LLMSpecV1
 from ..llm_family import CustomLLMFamilyV1
-from ..reasoning_parsers import deepseek_r1_reasoning_parser  # noqa: F401
-from ..reasoning_parsers.abs_reasoning_parsers import ReasoningParserManager
 from ..utils import (
     DEEPSEEK_TOOL_CALL_FAMILY,
     QWEN_TOOL_CALL_FAMILY,
@@ -160,6 +158,7 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
     VLLM_SUPPORTED_MODELS.append("qwen2.5-coder")
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-coder-instruct")
     VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B-Preview")
+    VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B")
     VLLM_SUPPORTED_CHAT_MODELS.append("marco-o1")
     VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-distill-qwen")
@@ -196,6 +195,7 @@ if VLLM_INSTALLED and vllm.__version__ > "0.5.3":
 if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5")
+    VLLM_SUPPORTED_VISION_MODEL_LIST.append("InternVL2.5-MPO")
 if VLLM_INSTALLED and vllm.__version__ >= "0.6.2":
     VLLM_SUPPORTED_CHAT_MODELS.append("minicpm3-4b")
@@ -211,9 +211,10 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.7.0":
 if VLLM_INSTALLED and vllm.__version__ >= "0.7.2":
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2.5-vl-instruct")
+    VLLM_SUPPORTED_CHAT_MODELS.append("moonlight-16b-a3b-instruct")
 if VLLM_INSTALLED and vllm.__version__ >= "0.7.3":
-    VLLM_SUPPORTED_CHAT_MODELS.append("qwen-2.5-instruct-1m")
+    VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct-1m")
 class VLLMModel(LLM):
@@ -243,7 +244,6 @@ class VLLMModel(LLM):
         self.lora_modules = peft_model
         self.lora_requests: List[LoRARequest] = []
         self._xavier_config = None
-        self.reasoning_parser = None
     def set_xavier_config(self, value: Optional[Dict]):
         self._xavier_config = value  # type: ignore
@@ -274,14 +274,8 @@ class VLLMModel(LLM):
         self._model_config = self._sanitize_model_config(self._model_config)
         reasoning_content = self._model_config.pop("reasoning_content")
-        # Initialize reasoning parser if model has reasoning ability
-        if "reasoning" in self.model_family.model_ability and reasoning_content:
-            module_name = self.model_family.model_family or self.model_family.model_name
-            self.reasoning_parser = ReasoningParserManager.get_parser(module_name)
-            self.reasoning_parser = self.reasoning_parser(
-                self.model_family.reasoning_start_tag,
-                self.model_family.reasoning_end_tag,
-            )
+        self.prepare_parse_reasoning_content(reasoning_content)
         if self.lora_modules is None:
             self.lora_requests = []
         else:
@@ -581,6 +575,8 @@ class VLLMModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
         sanitized_generate_config = self._sanitize_generate_config(generate_config)
+        if self.reasoning_parser:
+            sanitized_generate_config.pop("stop")
         logger.debug(
             "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
         )
@@ -812,18 +808,23 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
         i = 0
         async for chunk in chunks:
             if i == 0:
-                yield self._get_first_chat_completion_chunk(chunk)
+                yield self._get_first_chat_completion_chunk(
+                    chunk, self.reasoning_parser
+                )
             # usage
             choices = chunk.get("choices")
             if not choices:
                 yield self._get_final_chat_completion_chunk(chunk)
             else:
                 if self.is_tool_call_chunk(chunk):
-                    yield self._tool_calls_completion_chunk(
-                        self.model_family, self.model_uid, chunk
+                    yield self._post_process_completion_chunk(
+                        self.model_family,
+                        self.model_uid,
+                        chunk,
+                        reasoning_parser=self.reasoning_parser,
                     )
                 else:
-                    yield self._to_chat_completion_chunk(chunk)
+                    yield self._to_chat_completion_chunk(chunk, self.reasoning_parser)
             i += 1
     @vllm_check
@@ -863,7 +864,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
             )
             assert not isinstance(c, AsyncGenerator)
             if tools:
-                return self._tool_calls_completion(self.model_family, self.model_uid, c)
+                return self._post_process_completion(
+                    self.model_family, self.model_uid, c, self.reasoning_parser
+                )
             return self._to_chat_completion(c, self.reasoning_parser)
@@ -905,31 +908,15 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
     def _sanitize_model_config(
         self, model_config: Optional[VLLMModelConfig]
     ) -> VLLMModelConfig:
-        if model_config is None:
-            model_config = VLLMModelConfig()
-        cuda_count = self._get_cuda_count()
-        model_config.setdefault("tokenizer_mode", "auto")
-        model_config.setdefault("trust_remote_code", True)
-        model_config.setdefault("tensor_parallel_size", cuda_count)
-        model_config.setdefault("block_size", 16)
-        model_config.setdefault("swap_space", 4)
-        model_config.setdefault("gpu_memory_utilization", 0.90)
-        model_config.setdefault("max_num_seqs", 256)
-        model_config.setdefault("quantization", None)
-        model_config.setdefault("max_model_len", None)
-        model_config["limit_mm_per_prompt"] = (
-            json.loads(model_config.get("limit_mm_per_prompt"))  # type: ignore
-            if model_config.get("limit_mm_per_prompt")
-            else {
-                "image": 2,  # default 2 images all chat
-            }
-        )
-        # Add scheduling policy if vLLM version is 0.6.3 or higher
-        if vllm.__version__ >= "0.6.3":
-            model_config.setdefault("scheduling_policy", "fcfs")
+        model_config = super()._sanitize_model_config(model_config)
+        if vllm.__version__ >= "0.5.5":
+            model_config["limit_mm_per_prompt"] = (
+                json.loads(model_config.get("limit_mm_per_prompt"))  # type: ignore
+                if model_config.get("limit_mm_per_prompt")
+                else {
+                    "image": 2,  # default 2 images all chat
+                }
+            )
         return model_config
     def _sanitize_chat_config(

xinference/types.py CHANGED Viewed

@@ -78,6 +78,7 @@ class EmbeddingData(TypedDict):
 class Embedding(TypedDict):
     object: Literal["list"]
     model: str
+    model_replica: str
     data: List[EmbeddingData]
     usage: EmbeddingUsage
@@ -276,6 +277,7 @@ class LlamaCppModelConfig(TypedDict, total=False):
     use_mmap: bool
     use_mlock: bool
     n_threads: Optional[int]
+    n_parallel: Optional[int]
     n_batch: int
     last_n_tokens_size: int
     lora_base: Optional[str]
@@ -284,6 +286,7 @@ class LlamaCppModelConfig(TypedDict, total=False):
     n_gqa: Optional[int]  # (TEMPORARY) must be 8 for llama2 70b
     rms_norm_eps: Optional[float]  # (TEMPORARY)
     verbose: bool
+    reasoning_content: bool
 class PytorchGenerateConfig(TypedDict, total=False):
@@ -330,6 +333,7 @@ class PytorchModelConfig(TypedDict, total=False):
     trust_remote_code: bool
     max_num_seqs: int
     enable_tensorizer: Optional[bool]
+    reasoning_content: bool
 def get_pydantic_model_from_method(

xinference/web/ui/build/asset-manifest.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "files": {
     "main.css": "./static/css/main.f8177338.css",
-    "main.js": "./static/js/main.ad42919c.js",
+    "main.js": "./static/js/main.55b70cb7.js",
     "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
     "index.html": "./index.html",
     "main.f8177338.css.map": "./static/css/main.f8177338.css.map",
-    "main.ad42919c.js.map": "./static/js/main.ad42919c.js.map"
+    "main.55b70cb7.js.map": "./static/js/main.55b70cb7.js.map"
   },
   "entrypoints": [
     "static/css/main.f8177338.css",
-    "static/js/main.ad42919c.js"
+    "static/js/main.55b70cb7.js"
   ]
 }

xinference/web/ui/build/index.html CHANGED Viewed

	@@ -1 +1 @@
1	- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.~~ad42919c~~.js"></script><link href="./static/css/main.f8177338.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1	+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.55b70cb7.js"></script><link href="./static/css/main.f8177338.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

xinference 1.3.0.post1__py3-none-any.whl → 1.3.1__py3-none-any.whl

Potentially problematic release.

xinference 1.3.0.post1py3-none-any.whl → 1.3.1py3-none-any.whl