PyPI - xinference - Versions diffs - 1.3.1.post1__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

xinference 1.3.1.post1py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (42) hide show

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-03-11T12:00:36+0800",
+ "date": "2025-03-21T14:33:52+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "2ef99fbb5450a76a6ba07a909f58b8c2e4c22a28",
- "version": "1.3.1.post1"
+ "full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
+ "version": "1.4.0"
 }
 '''  # END VERSION_JSON

xinference/core/chat_interface.py CHANGED Viewed

@@ -137,7 +137,7 @@ class GradioInterface:
                 ):
                     assert isinstance(chunk, dict)
                     delta = chunk["choices"][0]["delta"]
-                    if "content" not in delta:
+                    if "content" not in delta or delta["content"] is None:
                         continue
                     else:
                         # some model like deepseek-r1-distill-qwen

xinference/model/llm/__init__.py CHANGED Viewed

@@ -143,6 +143,7 @@ def _install():
         DeepSeekV2PytorchModel,
     )
     from .transformers.deepseek_vl import DeepSeekVLChatModel
+    from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
     from .transformers.glm4v import Glm4VModel
     from .transformers.glm_edge_v import GlmEdgeVModel
     from .transformers.intern_vl import InternVLChatModel
@@ -198,6 +199,8 @@ def _install():
             OptPytorchModel,
             GlmEdgeVModel,
             CogAgentChatModel,
+            Gemma3TextChatModel,
+            Gemma3ChatModel,
         ]
     )
     if OmniLMMModel:  # type: ignore

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -39,10 +39,15 @@ logger = logging.getLogger(__name__)
 USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
-class _Sentinel:
+class _Done:
     pass
+class _Error:
+    def __init__(self, msg):
+        self.msg = msg
 class XllamaCppModel(LLM, ChatModelMixin):
     def __init__(
         self,
@@ -200,7 +205,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
             )
             prompt_json = orjson.dumps(data)
-            def _res_callback(ok):
+            def _error_callback(err):
+                try:
+                    msg = orjson.loads(err)
+                    q.put(_Error(msg))
+                except Exception as e:
+                    q.put(_Error(str(e)))
+            def _ok_callback(ok):
                 try:
                     res = orjson.loads(ok)
                     res["model"] = self.model_uid
@@ -209,10 +221,10 @@ class XllamaCppModel(LLM, ChatModelMixin):
                     logger.exception("handle_completions callback failed: %s", e)
             try:
-                self._llm.handle_completions(prompt_json, _res_callback, _res_callback)
+                self._llm.handle_completions(prompt_json, _error_callback, _ok_callback)
             except Exception as ex:
                 logger.exception("handle_completions failed: %s", ex)
-            q.put(_Sentinel)
+            q.put(_Done)
         assert self._executor
         self._executor.submit(_handle_completion)
@@ -220,12 +232,17 @@ class XllamaCppModel(LLM, ChatModelMixin):
         if stream:
             def _to_iterator():
-                while (r := q.get()) is not _Sentinel:
+                while (r := q.get()) is not _Done:
+                    if type(r) is _Error:
+                        raise Exception("Got error in generate stream: %s", r.msg)
                     yield r
             return _to_iterator()
         else:
-            return q.get()
+            r = q.get()
+            if type(r) is _Error:
+                raise Exception("Got error in generate: %s", r.msg)
+            return r
     def chat(
         self,
@@ -253,7 +270,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
             )
             prompt_json = orjson.dumps(data)
-            def _res_callback(ok):
+            def _error_callback(err):
+                try:
+                    msg = orjson.loads(err)
+                    q.put(_Error(msg))
+                except Exception as e:
+                    q.put(_Error(str(e)))
+            def _ok_callback(ok):
                 try:
                     res = orjson.loads(ok)
                     res["model"] = self.model_uid
@@ -263,11 +287,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
             try:
                 self._llm.handle_chat_completions(
-                    prompt_json, _res_callback, _res_callback
+                    prompt_json, _error_callback, _ok_callback
                 )
             except Exception as ex:
                 logger.exception("handle_chat_completions failed: %s", ex)
-            q.put(_Sentinel)
+            q.put(_Done)
         assert self._executor
         self._executor.submit(_handle_chat_completion)
@@ -275,14 +299,19 @@ class XllamaCppModel(LLM, ChatModelMixin):
         if stream:
             def _to_iterator():
-                while (r := q.get()) is not _Sentinel:
+                while (r := q.get()) is not _Done:
+                    if type(r) is _Error:
+                        raise Exception("Got error in chat stream: %s", r.msg)
                     yield r
             return self._to_chat_completion_chunks(
                 _to_iterator(), self.reasoning_parser
             )
         else:
-            return self._to_chat_completion(q.get(), self.reasoning_parser)
+            r = q.get()
+            if type(r) is _Error:
+                raise Exception("Got error in chat: %s", r.msg)
+            return self._to_chat_completion(r, self.reasoning_parser)
 class LlamaCppModel(LLM):
@@ -533,10 +562,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
         tools = generate_config.pop("tools", []) if generate_config else None
         full_context_kwargs = {}
         if tools:
-            if model_family in QWEN_TOOL_CALL_FAMILY:
+            if (
+                model_family in QWEN_TOOL_CALL_FAMILY
+                or model_family in DEEPSEEK_TOOL_CALL_FAMILY
+            ):
                 full_context_kwargs["tools"] = tools
-            elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
-                self._tools_to_messages_for_deepseek(messages, tools)
         assert self.model_family.chat_template is not None
         full_prompt = self.get_full_context(
             messages, self.model_family.chat_template, **full_context_kwargs

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -5786,6 +5786,265 @@
       "<start_of_turn>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "gemma-3-1b-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-1b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "gemma-3-it",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat",
+      "vision"
+    ],
+    "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-4b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-12b-it"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "none",
+          "4-bit",
+          "8-bit"
+        ],
+        "model_id": "google/gemma-3-27b-it"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "IQ3_XS",
+          "IQ3_XXS",
+          "IQ4_NL",
+          "IQ4_XS",
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "bf16"
+        ],
+        "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
+        "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 4,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "4bit",
+          "6bit",
+          "8bit",
+          "fp16"
+        ],
+        "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
+    "stop_token_ids": [
+      1,
+      105,
+      106
+    ],
+    "stop": [
+      "<eos>",
+      "<end_of_turn>",
+      "<start_of_turn>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -6923,7 +7182,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
+          "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
+          "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
         },
         {
           "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
+          "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
         },
         {
           "model_format": "awq",
@@ -6961,7 +7220,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
         },
         {
           "model_format": "awq",
@@ -6969,7 +7228,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
         },
         {
           "model_format": "awq",
@@ -6987,7 +7246,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
         },
         {
           "model_format": "awq",
@@ -7005,7 +7264,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
         },
         {
           "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
             "8-bit",
             "none"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
         },
         {
           "model_format": "awq",
@@ -7023,7 +7282,7 @@
           "quantizations": [
             "Int4"
           ],
-          "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
+          "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
         }
     ],
     "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7892,7 +8151,7 @@
         "model_id": "mlx-community/DeepSeek-V3-{quantization}"
       }
     ],
-    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <｜User｜> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.  Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables.  {{ tools }} {% endif %} {{ message.content }} {% if last %} <｜Assistant｜> {% endif %} {% elif message.role == \"assistant\" %} <｜Assistant｜> {% if message.tool_calls %} <｜tool▁calls▁begin｜> {% for tool in message.tool_calls %} <｜tool▁call▁begin｜> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <｜tool▁call▁end｜> {% endfor %} <｜tool▁calls▁end｜> {% else %} {{ message.content }} {% if not last %} <｜end▁of▁sentence｜> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <｜tool▁outputs▁begin｜> <｜tool▁output▁begin｜> {{ message.content }} <｜tool▁output▁end｜> <｜tool▁outputs▁end｜> {% if last and message.role != \"assistant\" %} <｜Assistant｜> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <｜User｜> {{ prompt }} {% endif %} <｜Assistant｜> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
     "stop_token_ids": [
       1
     ],

xinference 1.3.1.post1__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

xinference 1.3.1.post1py3-none-any.whl → 1.4.0py3-none-any.whl