PyPI - xinference - Versions diffs - 1.3.1__py3-none-any.whl → 1.3.1.post1__py3-none-any.whl - Mend

xinference 1.3.1py3-none-any.whl → 1.3.1.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (17) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-03-09T12:06:50+0800",
+ "date": "2025-03-11T12:00:36+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "5d6ec937ce2aca2511e9e0debc4c2ab06ca41f09",
- "version": "1.3.1"
+ "full-revisionid": "2ef99fbb5450a76a6ba07a909f58b8c2e4c22a28",
+ "version": "1.3.1.post1"
 }
 '''  # END VERSION_JSON

xinference/core/chat_interface.py CHANGED Viewed

@@ -113,6 +113,7 @@ class GradioInterface:
             max_tokens: int,
             temperature: float,
             lora_name: str,
+            stream: bool,
         ) -> Generator:
             from ..client import RESTfulClient
@@ -123,29 +124,40 @@ class GradioInterface:
             messages = to_chat(flatten(history))
             messages.append(dict(role="user", content=message))
-            response_content = ""
-            for chunk in model.chat(
-                messages,
-                generate_config={
-                    "max_tokens": int(max_tokens),
-                    "temperature": temperature,
-                    "stream": True,
-                    "lora_name": lora_name,
-                },
-            ):
-                assert isinstance(chunk, dict)
-                delta = chunk["choices"][0]["delta"]
-                if "content" not in delta:
-                    continue
-                else:
-                    # some model like deepseek-r1-distill-qwen
-                    # will generate <think>...</think> ...
-                    # in gradio, no output will be rendered,
-                    # thus escape html tags in advance
-                    response_content += html.escape(delta["content"])
-                    yield response_content
-            yield response_content
+            if stream:
+                response_content = ""
+                for chunk in model.chat(
+                    messages,
+                    generate_config={
+                        "max_tokens": int(max_tokens),
+                        "temperature": temperature,
+                        "stream": True,
+                        "lora_name": lora_name,
+                    },
+                ):
+                    assert isinstance(chunk, dict)
+                    delta = chunk["choices"][0]["delta"]
+                    if "content" not in delta:
+                        continue
+                    else:
+                        # some model like deepseek-r1-distill-qwen
+                        # will generate <think>...</think> ...
+                        # in gradio, no output will be rendered,
+                        # thus escape html tags in advance
+                        response_content += html.escape(delta["content"])
+                        yield response_content
+                yield response_content
+            else:
+                result = model.chat(
+                    messages,
+                    generate_config={
+                        "max_tokens": int(max_tokens),
+                        "temperature": temperature,
+                        "lora_name": lora_name,
+                    },
+                )
+                yield html.escape(result["choices"][0]["message"]["content"])  # type: ignore
         return gr.ChatInterface(
             fn=generate_wrapper,
@@ -153,7 +165,9 @@ class GradioInterface:
                 gr.Slider(
                     minimum=1,
                     maximum=self.context_length,
-                    value=512,
+                    value=512
+                    if "reasoning" not in self.model_ability
+                    else self.context_length // 2,
                     step=1,
                     label="Max Tokens",
                 ),
@@ -161,6 +175,7 @@ class GradioInterface:
                     minimum=0, maximum=2, value=1, step=0.01, label="Temperature"
                 ),
                 gr.Text(label="LoRA Name"),
+                gr.Checkbox(label="Stream", value=True),
             ],
             title=f"🚀 Xinference Chat Bot : {self.model_name} 🚀",
             css="""

xinference/model/llm/core.py CHANGED Viewed

@@ -25,8 +25,7 @@ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
 from ...core.utils import parse_replica_model_uid
 from ...types import PeftModelConfig
 from ..core import ModelDescription
-from .reasoning_parsers import deepseek_r1_reasoning_parser  # noqa: F401
-from .reasoning_parsers.abs_reasoning_parsers import ReasoningParserManager
+from .reasoning_parser import ReasoningParser
 if TYPE_CHECKING:
     from .llm_family import LLMFamilyV1, LLMSpecV1
@@ -123,9 +122,7 @@ class LLM(abc.ABC):
     def prepare_parse_reasoning_content(self, reasoning_content):
         # Initialize reasoning parser if model has reasoning ability
         if "reasoning" in self.model_family.model_ability and reasoning_content:
-            module_name = self.model_family.model_family or self.model_family.model_name
-            self.reasoning_parser = ReasoningParserManager.get_parser(module_name)
-            self.reasoning_parser = self.reasoning_parser(
+            self.reasoning_parser = ReasoningParser(
                 self.model_family.reasoning_start_tag,
                 self.model_family.reasoning_end_tag,
             )

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -43,7 +43,7 @@ class _Sentinel:
     pass
-class XllamaCppModel(LLM):
+class XllamaCppModel(LLM, ChatModelMixin):
     def __init__(
         self,
         model_uid: str,
@@ -83,6 +83,7 @@ class XllamaCppModel(LLM):
             llamacpp_model_config.setdefault("n_gpu_layers", -1)
         elif self._is_linux():
             llamacpp_model_config.setdefault("n_gpu_layers", -1)
+        llamacpp_model_config.setdefault("reasoning_content", False)
         return llamacpp_model_config
@@ -131,6 +132,9 @@ class XllamaCppModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        reasoning_content = self._llamacpp_model_config.pop("reasoning_content")
+        self.prepare_parse_reasoning_content(reasoning_content)
         if os.path.isfile(self.model_path):
             # mostly passed from --model_path
             model_path = os.path.realpath(self.model_path)
@@ -274,9 +278,11 @@ class XllamaCppModel(LLM):
                 while (r := q.get()) is not _Sentinel:
                     yield r
-            return _to_iterator()
+            return self._to_chat_completion_chunks(
+                _to_iterator(), self.reasoning_parser
+            )
         else:
-            return q.get()
+            return self._to_chat_completion(q.get(), self.reasoning_parser)
 class LlamaCppModel(LLM):

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -9449,7 +9449,7 @@
   },
   {
     "version": 1,
-    "context_length": 32768,
+    "context_length": 131072,
     "model_name": "QwQ-32B",
     "model_lang": [
       "en",
@@ -9496,15 +9496,99 @@
         "model_size_in_billions": 32,
         "quantizations": [
           "fp16",
-          "Q2_k",
-          "Q3_K_M",
-          "Q4_0",
-          "Q4_K_M",
-          "Q5_0",
-          "Q5_K_M",
-          "Q6_K",
-          "Q8_0"
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
         ],
+        "quantization_parts": {
+          "fp16": [
+            "00001-of-000017",
+            "00002-of-000017",
+            "00003-of-000017",
+            "00004-of-000017",
+            "00005-of-000017",
+            "00006-of-000017",
+            "00007-of-000017",
+            "00008-of-000017",
+            "00009-of-000017",
+            "00010-of-000017",
+            "00011-of-000017",
+            "00012-of-000017",
+            "00013-of-000017",
+            "00014-of-000017",
+            "00015-of-000017",
+            "00016-of-000017",
+            "00017-of-000017"
+          ],
+          "q2_k": [
+            "00001-of-00004",
+            "00002-of-00004",
+            "00003-of-00004",
+            "00004-of-00004"
+          ],
+          "q3_k_m": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q4_0": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q4_k_m": [
+            "00001-of-00005",
+            "00002-of-00005",
+            "00003-of-00005",
+            "00004-of-00005",
+            "00005-of-00005"
+          ],
+          "q5_0": [
+            "00001-of-00006",
+            "00002-of-00006",
+            "00003-of-00006",
+            "00004-of-00006",
+            "00005-of-00006",
+            "00006-of-00006"
+          ],
+          "q5_k_m": [
+            "00001-of-00006",
+            "00002-of-00006",
+            "00003-of-00006",
+            "00004-of-00006",
+            "00005-of-00006",
+            "00006-of-00006"
+          ],
+          "q6_k": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "q8_0": [
+            "00001-of-00009",
+            "00002-of-00009",
+            "00003-of-00009",
+            "00004-of-00009",
+            "00005-of-00009",
+            "00006-of-00009",
+            "00007-of-00009",
+            "00008-of-00009",
+            "00009-of-00009"
+          ]
+        },
         "model_id": "Qwen/QwQ-32B-GGUF",
         "model_file_name_template": "qwq-32b-{quantization}.gguf"
       }

xinference/model/llm/llm_family_modelscope.json CHANGED Viewed

@@ -7217,7 +7217,7 @@
         ],
         "model_id": "AI-ModelScope/QwQ-32B-Preview-GGUF",
         "model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf",
-	"model_hub": "modelscope"
+        "model_hub": "modelscope"
       }
     ],
     "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
@@ -7234,7 +7234,7 @@
   },
 {
     "version": 1,
-    "context_length": 32768,
+    "context_length": 131072,
     "model_name": "QwQ-32B",
     "model_lang": [
       "en",
@@ -7284,14 +7284,14 @@
         "model_size_in_billions": 32,
         "quantizations": [
           "fp16",
-          "Q2_k",
-          "Q3_K_M",
-          "Q4_0",
-          "Q4_K_M",
-          "Q5_0",
-          "Q5_K_M",
-          "Q6_K",
-          "Q8_0"
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
         ],
         "model_id": "Qwen/QwQ-32B-GGUF",
         "model_file_name_template": "qwq-32b-{quantization}.gguf",

xinference/model/llm/{reasoning_parsers/deepseek_r1_reasoning_parser.py → reasoning_parser.py} RENAMED Viewed

@@ -1,20 +1,17 @@
 import re
 from typing import Optional, Tuple, Union
-from ....types import ChatCompletionChunkDelta, CompletionChoice
-from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
+from ...types import ChatCompletionChunkDelta, CompletionChoice
-@ReasoningParserManager.register_module("deepseek-v3")
-@ReasoningParserManager.register_module("deepseek-r1-distill-qwen")
-@ReasoningParserManager.register_module("deepseek-r1-distill-llama")
-class DeepSeekR1ReasoningParser(ReasoningParser):
-    """Reasoning parser for DeepSeek-R1 model."""
+class ReasoningParser:
+    """Reasoning parser for reasoning model."""
     def __init__(
         self, reasoning_start_tag: str = "<think>", reasoning_end_tag: str = "</think>"
     ):
-        super().__init__(reasoning_start_tag, reasoning_end_tag)
+        self.reasoning_start_tag = reasoning_start_tag
+        self.reasoning_end_tag = reasoning_end_tag
         self.reasoning_regex = re.compile(
             rf"{self.reasoning_start_tag}(.*?){self.reasoning_end_tag}", re.DOTALL
         )

xinference/model/llm/utils.py CHANGED Viewed

@@ -55,7 +55,7 @@ from .llm_family import (
     _get_cache_dir,
     get_cache_status,
 )
-from .reasoning_parsers.abs_reasoning_parsers import ReasoningParser
+from .reasoning_parser import ReasoningParser
 logger = logging.getLogger(__name__)
@@ -250,8 +250,30 @@ class ChatModelMixin:
         reasoning_parser: Optional[ReasoningParser] = None,
         previous_texts: Optional[List[str]] = None,
     ) -> ChatCompletionChunk:
+        choices = chunk.get("choices")
+        if (
+            chunk.get("object") == "chat.completion.chunk"
+            and choices
+            and "delta" in choices[0]
+        ):
+            if reasoning_parser is not None:
+                # process parsing reasoning content
+                assert previous_texts is not None
+                delta = choices[0]["delta"]  # type: ignore
+                if text := delta.get("content"):
+                    current_text = previous_texts[-1] + text
+                    delta = reasoning_parser.extract_reasoning_content_streaming(
+                        previous_text=previous_texts[-1],
+                        current_text=current_text,
+                        delta_text=text,
+                    )
+                    previous_texts[-1] = current_text
+                    choices[0]["delta"] = delta  # type: ignore
+            # Already a ChatCompletionChunk, we don't need to convert chunk.
+            return cast(ChatCompletionChunk, chunk)
         choices_list = []
-        for i, choice in enumerate(chunk["choices"]):
+        for i, choice in enumerate(choices):  # type: ignore
             delta = ChatCompletionChunkDelta()
             if "text" in choice and choice["finish_reason"] is None:
                 if reasoning_parser is None:
@@ -345,9 +367,10 @@ class ChatModelMixin:
             if not choices:
                 yield cls._get_final_chat_completion_chunk(chunk)
             else:
-                yield cls._to_chat_completion_chunk(
+                r = cls._to_chat_completion_chunk(
                     chunk, reasoning_parse, previous_texts
                 )
+                yield r
     @classmethod
     def _tools_to_messages_for_deepseek(
@@ -405,6 +428,21 @@ class ChatModelMixin:
     def _to_chat_completion(
         completion: Completion, reasoning_parser: Optional[ReasoningParser] = None
     ) -> ChatCompletion:
+        if completion.get("object") == "chat.completion" and completion.get("choices"):
+            # Already a ChatCompletion
+            if reasoning_parser is not None:
+                for choice in completion["choices"]:
+                    message = choice["message"]  # type: ignore
+                    text = message["content"]
+                    (
+                        reasoning_content,
+                        content,
+                    ) = reasoning_parser.extract_reasoning_content(text)
+                    message["content"] = content
+                    if reasoning_content is not None:
+                        message["reasoning_content"] = reasoning_content
+            return cast(ChatCompletion, completion)
         choices = []
         for i, choice in enumerate(completion["choices"]):
             content = choice["text"]

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -576,6 +576,8 @@ class VLLMModel(LLM):
         sanitized_generate_config = self._sanitize_generate_config(generate_config)
         if self.reasoning_parser:
+            # For reasoning model, the </think> we be split into multiple words,
+            # if `stop` param is passed, so we pop it from config.
             sanitized_generate_config.pop("stop")
         logger.debug(
             "Enter generate, prompt: %s, generate config: %s", prompt, generate_config

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: xinference
-Version: 1.3.1
+Version: 1.3.1.post1
 Summary: Model Serving Made Easy
 Home-page: https://github.com/xorbitsai/inference
 Author: Qin Xuye
@@ -292,6 +292,7 @@ potential of cutting-edge AI models.
 ## 🔥 Hot Topics
 ### Framework Enhancements
+- [Xllamacpp](https://github.com/xorbitsai/xllamacpp): New llama.cpp Python binding, maintained by Xinference team, supports continuous batching and is more production-ready.: [#2997](https://github.com/xorbitsai/inference/pull/2997)
 - Distributed inference: running models across workers: [#2877](https://github.com/xorbitsai/inference/pull/2877)
 - VLLM enhancement: Shared KV cache across multiple replicas: [#2732](https://github.com/xorbitsai/inference/pull/2732)
 - Support Continuous batching for Transformers engine: [#1724](https://github.com/xorbitsai/inference/pull/1724)
@@ -299,8 +300,8 @@ potential of cutting-edge AI models.
 - Support specifying worker and GPU indexes for launching models: [#1195](https://github.com/xorbitsai/inference/pull/1195)
 - Support SGLang backend: [#1161](https://github.com/xorbitsai/inference/pull/1161)
 - Support LoRA for LLM and image models: [#1080](https://github.com/xorbitsai/inference/pull/1080)
-- Support speech recognition model: [#929](https://github.com/xorbitsai/inference/pull/929)
 ### New Models
+- Built-in support for [QwQ-32B](https://qwenlm.github.io/blog/qwq-32b/): [#3005](https://github.com/xorbitsai/inference/pull/3005)
 - Built-in support for [DeepSeek V3 and R1](https://github.com/deepseek-ai/DeepSeek-R1): [#2864](https://github.com/xorbitsai/inference/pull/2864)
 - Built-in support for [InternVL2.5](https://internvl.github.io/blog/2024-12-05-InternVL-2.5/): [#2776](https://github.com/xorbitsai/inference/pull/2776)
 - Built-in support for [DeepSeek-R1-Distill-Llama](https://github.com/deepseek-ai/DeepSeek-R1?tab=readme-ov-file#deepseek-r1-distill-models): [#2811](https://github.com/xorbitsai/inference/pull/2811)
@@ -308,7 +309,6 @@ potential of cutting-edge AI models.
 - Built-in support for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M): [#2790](https://github.com/xorbitsai/inference/pull/2790)
 - Built-in support for [qwen2.5-vl](https://github.com/QwenLM/Qwen2.5-VL): [#2788](https://github.com/xorbitsai/inference/pull/2788)
 - Built-in support for [internlm3-instruct](https://github.com/InternLM/InternLM): [#2789](https://github.com/xorbitsai/inference/pull/2789)
-- Built-in support for [MeloTTS](https://github.com/myshell-ai/MeloTTS): [#2760](https://github.com/xorbitsai/inference/pull/2760)
 ### Integrations
 - [Dify](https://docs.dify.ai/advanced/model-configuration/xinference): an LLMOps platform that enables developers (and even non-developers) to quickly build useful applications based on large language models, ensuring they are visual, operable, and improvable.
 - [FastGPT](https://github.com/labring/FastGPT): a knowledge-based platform built on the LLM, offers out-of-the-box data processing and model invocation capabilities, allows for workflow orchestration through Flow visualization.

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
 xinference/_compat.py,sha256=URSJQLXrcsTO9B_4x0wVDPijYQDhuVJmZ95npID560w,4197
-xinference/_version.py,sha256=Qg0ZT0v5l0WLBFPFak_ZgiMLc3mfmofYyFkTV9kMIUE,497
+xinference/_version.py,sha256=qMz600g9USAjrV1nTxM3bBcpOiTBWs1VJPEtdMggVGg,503
 xinference/conftest.py,sha256=ZB7li77s4_H4ZEQpDo2PX-b4zrs8-bIpvh59P_CaSoo,9691
 xinference/constants.py,sha256=mEW4HDzjXtDXN61Mt6TtJrJ4ljbB6VUkh97e3oDbNx4,3905
 xinference/device_utils.py,sha256=ELsqvnjvz9wYthTyQFzKSV4mZsaASz6hj_IsfMmfMWc,4447
@@ -21,7 +21,7 @@ xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7
 xinference/client/restful/restful_client.py,sha256=DofFF0ZaOmBpCVp9qtAeYDGbvd-KS5u4_GMGp8AbbM4,53994
 xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/core/cache_tracker.py,sha256=3ubjYCU5aZToSp2GEuzedECVrg-PR4kThTefrFUkb9g,6971
-xinference/core/chat_interface.py,sha256=5fUr9-OLrFTZ5TvFGE8gX4N_-N4EmYRp74b5fD6cyAU,21048
+xinference/core/chat_interface.py,sha256=X5ZC91M_uKIG8NW1xupKUDNoqzUHMpLp4-ijf-YhjbE,21766
 xinference/core/event.py,sha256=42F38H2WOl6aPxp2oxX6WNxHRRxbnvYRmbt4Ar7NP4U,1640
 xinference/core/image_interface.py,sha256=5Iuoiw3g2TvgOYi3gRIAGApve2nNzfMPduRrBHvd1NY,13755
 xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
@@ -87,23 +87,21 @@ xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17k
 xinference/model/image/stable_diffusion/core.py,sha256=V3BaASwx8q1YERb4jhhaYEDFiwh3BuPAz8pVZTuktAQ,24717
 xinference/model/image/stable_diffusion/mlx.py,sha256=GZsozzGB04NfHAdU9MI6gwWE1t_A-s_Ddn_ic8DlkKQ,7476
 xinference/model/llm/__init__.py,sha256=UJOSz9zr5mAj8Fm09yoZbEe4xBWYnSxUV9aGE50e5dc,14184
-xinference/model/llm/core.py,sha256=SLtv3VHedM4ZSfqa_3oQN5qZfbPhWq2hBr4U17gvPPc,8908
-xinference/model/llm/llm_family.json,sha256=sCYtQ7d2ai9Q7zHyUM4VL4fPv8GeiylXVP_VHaLEsaE,372173
+xinference/model/llm/core.py,sha256=2AYRKdiJ5L1iKU9CE_C09IbEtE2KrsIy4dqkqg2txes,8626
+xinference/model/llm/llm_family.json,sha256=xszAQbwI5lvkdcxRSoowNddvPfuFF2aT6xnvzeyzo8w,374447
 xinference/model/llm/llm_family.py,sha256=SrgTmEKspAELhVqmMs7Rz6xUk7rmc9V61urvbWAZOVE,39214
 xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4IPNq1KQ-8IDPQQA,8759
-xinference/model/llm/llm_family_modelscope.json,sha256=NRwwsLNUYIwJvRnHlf-4kAThSm3Mqdtzq65wHGPnxP4,304584
+xinference/model/llm/llm_family_modelscope.json,sha256=2OuPertAGKnryliUofjnqemLrhHW1aaq6-4tPsrbKNI,304592
 xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
 xinference/model/llm/memory.py,sha256=GLNmXBI-AtMbuaJfEf50fnhN4rdbOZjLyT6L_Vjqa5g,10206
-xinference/model/llm/utils.py,sha256=5Gp6b0AwaNNFdwb72QnVD_l7uKSlDd3ORhnRNm_ZYnM,31102
+xinference/model/llm/reasoning_parser.py,sha256=Gqf2WS5olkD2SfJ3wlCAQsmQUZOIc1185h_01pIs7TQ,6067
+xinference/model/llm/utils.py,sha256=QYYGfaPYr3GG7dyowNgs4qyayid-gOtJ_ToXqAMVhSU,32846
 xinference/model/llm/llama_cpp/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/llama_cpp/core.py,sha256=0ChfDq8PC2f2bo4EqbAdLEnvmxtXx5imK1_73oy7f6o,20815
+xinference/model/llm/llama_cpp/core.py,sha256=rUVyaP5tE6xz9jy0m4iZ0ys99vl3sicnWZsEnbaXPfw,21181
 xinference/model/llm/lmdeploy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xinference/model/llm/lmdeploy/core.py,sha256=WvSP3x6t-HBv6hKh1qWZatFAzlcZCyyKqvc3ua8yPTI,19835
 xinference/model/llm/mlx/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
 xinference/model/llm/mlx/core.py,sha256=l4_MKw5UckM81kaCwgriy0KZU3zPN38p36P3J9USmgA,23568
-xinference/model/llm/reasoning_parsers/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
-xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py,sha256=YtOOVbSl6fLugn3vmzo_AQbbjl6H5kX9DPpP9KP3gnY,3004
-xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py,sha256=HmnAsNcoeUpyUSTNF0j_0Z4Am7OKiGrJnhNj-BhtQf0,6323
 xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
 xinference/model/llm/sglang/core.py,sha256=tMbvQOwQu5uBXBTMK5Vh-FR2Gc-Nbc0HIhp2iy47wCA,20606
 xinference/model/llm/transformers/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -130,7 +128,7 @@ xinference/model/llm/transformers/tensorizer_utils.py,sha256=VXSYbPZtCbd8lVvsnjD
 xinference/model/llm/transformers/utils.py,sha256=KETjuVR_RpF--fno0KxT068fD1v4REFhe-0wy_sCwRs,19584
 xinference/model/llm/transformers/yi_vl.py,sha256=iCdRLw-wizbU-qXXc8CT4DhC0Pt-uYg0vFwXEhAZjQg,8961
 xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
-xinference/model/llm/vllm/core.py,sha256=NIGZy-v5k3tAKFOWpeyJnKDIC7ghaAHvzCD9a2uidtM,38113
+xinference/model/llm/vllm/core.py,sha256=j5sdlrctBnouLJAfBs0Ofa1JbngTlYsDzrs2ManQN0o,38261
 xinference/model/llm/vllm/utils.py,sha256=LKOmwfFRrlSecawxT-uE39tC2RQbf1UIiSH9Uz90X6w,1313
 xinference/model/llm/vllm/xavier/__init__.py,sha256=CyLLkbImZouAk4lePIgKXT4WQoqyauIEwdqea5IOUVU,581
 xinference/model/llm/vllm/xavier/allocator.py,sha256=SJ2eCOxF6CWTBZIP39FRxeK6fxIE8pRshOPnSRc72d4,2691
@@ -15726,9 +15724,9 @@ xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9
 xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
 xinference/web/ui/src/locales/en.json,sha256=5MN-GKLcPOeUAsDbv_MRvD4uf86WsvUC6rhzTAtQevA,8925
 xinference/web/ui/src/locales/zh.json,sha256=27HeH4Qc96KuDJ9cgb4OybpUFG-GuuUiaD0ASsg3lyQ,8666
-xinference-1.3.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-xinference-1.3.1.dist-info/METADATA,sha256=YDvNiU9VGF9Kw0t9iy_RCk-ORuGiCz5sbY-9gXYC3L0,24298
-xinference-1.3.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-xinference-1.3.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
-xinference-1.3.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
-xinference-1.3.1.dist-info/RECORD,,
+xinference-1.3.1.post1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+xinference-1.3.1.post1.dist-info/METADATA,sha256=aag4egJ0PONQQ2mu6fWiY9KDHMpqqTzSTHAOPO11vb8,24447
+xinference-1.3.1.post1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+xinference-1.3.1.post1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
+xinference-1.3.1.post1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
+xinference-1.3.1.post1.dist-info/RECORD,,

xinference/model/llm/reasoning_parsers/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-# Copyright 2022-2024 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py DELETED Viewed

@@ -1,98 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Dict, Optional, Tuple, Type, Union
-from ....types import ChatCompletionChunkDelta, CompletionChoice
-class ReasoningParser(ABC):
-    """Abstract base class for reasoning content parsers."""
-    def __init__(
-        self,
-        reasoning_start_tag: str = "<think>",
-        reasoning_end_tag: str = "</think>",
-    ):
-        """Initialize the reasoning parser.
-        Args:
-            reasoning_start_tag (str, optional): Start tag for reasoning content. Defaults to "<think>".
-            reasoning_end_tag (str, optional): End tag for reasoning content. Defaults to "</think>".
-        """
-        self.reasoning_start_tag = reasoning_start_tag
-        self.reasoning_end_tag = reasoning_end_tag
-    @abstractmethod
-    def extract_reasoning_content_streaming(
-        self,
-        previous_text: str,
-        current_text: str,
-        delta_text: str,
-    ) -> ChatCompletionChunkDelta:
-        """Extract reasoning content from model output in a streaming fashion.
-        Args:
-            content (str): The model output content to parse.
-        Yields:
-            str: Extracted reasoning content chunks.
-        """
-        pass
-    @abstractmethod
-    def extract_reasoning_content(
-        self, model_output: Union[str, CompletionChoice]
-    ) -> Tuple[Optional[str], Optional[str]]:
-        """Extract reasoning content from model output.
-        Args:
-            content (str): The model output content to parse.
-        Returns:
-            Optional[str]: Extracted reasoning content, or None if no reasoning content found.
-        """
-        pass
-class ReasoningParserManager:
-    """Manager class for reasoning parsers."""
-    _parsers: Dict[str, Type[ReasoningParser]] = {}
-    @classmethod
-    def register(cls, model_name: str, parser_cls: Type[ReasoningParser]) -> None:
-        """Register a reasoning parser for a specific model.
-        Args:
-            model_name (str): The name of the model.
-            parser_cls (Type[ReasoningParser]): The parser class to register.
-        """
-        cls._parsers[model_name] = parser_cls
-    @classmethod
-    def register_module(cls, model_name: str):
-        """Decorator for registering a reasoning parser for a specific model.
-        Args:
-            model_name (str): The name of the model.
-        Returns:
-            Callable: The decorator function.
-        """
-        def _register(parser_cls: Type[ReasoningParser]) -> Type[ReasoningParser]:
-            cls.register(model_name, parser_cls)
-            return parser_cls
-        return _register
-    @classmethod
-    def get_parser(cls, model_name: str) -> Optional[Type[ReasoningParser]]:
-        """Get the registered parser for a specific model.
-        Args:
-            model_name (str): The name of the model.
-        Returns:
-            Optional[Type[ReasoningParser]]: The registered parser class, or None if not found.
-        """
-        return cls._parsers.get(model_name)

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/LICENSE RENAMED Viewed

File without changes

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/WHEEL RENAMED Viewed

File without changes

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xinference-1.3.1.dist-info → xinference-1.3.1.post1.dist-info}/top_level.txt RENAMED Viewed

File without changes

xinference 1.3.1__py3-none-any.whl → 1.3.1.post1__py3-none-any.whl

Potentially problematic release.

xinference 1.3.1py3-none-any.whl → 1.3.1.post1py3-none-any.whl