PyPI - xinference - Versions diffs - 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

xinference 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (26) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -7
xinference/client/handlers.py +3 -0
xinference/core/scheduler.py +4 -7
xinference/deploy/local.py +2 -1
xinference/model/audio/core.py +5 -0
xinference/model/audio/kokoro.py +139 -0
xinference/model/audio/model_spec.json +8 -0
xinference/model/audio/model_spec_modelscope.json +9 -0
xinference/model/llm/llama_cpp/core.py +21 -14
xinference/model/llm/llm_family.json +306 -1
xinference/model/llm/llm_family.py +4 -1
xinference/model/llm/llm_family_modelscope.json +307 -3
xinference/model/llm/mlx/core.py +11 -3
xinference/model/llm/transformers/core.py +9 -1
xinference/model/llm/transformers/qwen2_audio.py +3 -1
xinference/model/llm/transformers/qwen2_vl.py +20 -3
xinference/model/llm/transformers/utils.py +22 -11
xinference/model/llm/utils.py +111 -1
xinference/model/llm/vllm/core.py +13 -2
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/METADATA +9 -8
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/RECORD +26 -25
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
{xinference-1.2.1.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0

xinference/_version.py CHANGED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-01-24T16:52:57+0800",
+ "date": "2025-02-08T17:06:47+0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "a57b99b07b40d1082f69a8fc5b968d56bc3636bc",
- "version": "1.2.1"
+ "full-revisionid": "ac97a13a831de6debda52e6fdb8c1bf9366be57c",
+ "version": "1.2.2"
 }
 '''  # END VERSION_JSON

xinference/api/restful_api.py CHANGED Viewed

@@ -2000,25 +2000,22 @@ class RESTfulAPI(CancelMixin):
         from ..model.llm.utils import (
             GLM4_TOOL_CALL_FAMILY,
-            LLAMA3_TOOL_CALL_FAMILY,
             QWEN_TOOL_CALL_FAMILY,
+            TOOL_CALL_FAMILY,
         )
         model_family = desc.get("model_family", "")
-        function_call_models = (
-            QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY + LLAMA3_TOOL_CALL_FAMILY
-        )
-        if model_family not in function_call_models:
+        if model_family not in TOOL_CALL_FAMILY:
             if body.tools:
                 raise HTTPException(
                     status_code=400,
-                    detail=f"Only {function_call_models} support tool calls",
+                    detail=f"Only {TOOL_CALL_FAMILY} support tool calls",
                 )
             if has_tool_message:
                 raise HTTPException(
                     status_code=400,
-                    detail=f"Only {function_call_models} support tool messages",
+                    detail=f"Only {TOOL_CALL_FAMILY} support tool messages",
                 )
         if body.tools and body.stream:
             is_vllm = await model.is_vllm_backend()

xinference/client/handlers.py CHANGED Viewed

@@ -13,3 +13,6 @@ from .restful.restful_client import (  # noqa: F401
 from .restful.restful_client import (  # noqa: F401
     RESTfulImageModelHandle as ImageModelHandle,
 )
+from .restful.restful_client import (  # noqa: F401
+    RESTfulVideoModelHandle as VideoModelHandle,
+)

xinference/core/scheduler.py CHANGED Viewed

@@ -269,16 +269,13 @@ class InferenceRequest:
         )
-def _get_valid_batch_kv_cache(data, skipped_indexes: Set[int]):
-    from transformers.cache_utils import DynamicCache
-    cache = DynamicCache.from_legacy_cache(data)
+def _get_valid_batch_kv_cache(cache, skipped_indexes: Set[int]):
     batch_size = cache.key_cache[0].shape[0]
     batch_slices = [num for num in range(batch_size) if num not in skipped_indexes]
     for idx in range(len(cache)):
-        cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::]
-        cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::]
-    return cache.to_legacy_cache()
+        cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::].contiguous()
+        cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::].contiguous()
+    return cache
 class SchedulerActor(xo.StatelessActor):

xinference/deploy/local.py CHANGED Viewed

@@ -41,7 +41,8 @@ async def _start_local_cluster(
 ):
     from .utils import create_worker_actor_pool
-    logging.config.dictConfig(logging_conf)  # type: ignore
+    if logging_conf:
+        logging.config.dictConfig(logging_conf)  # type: ignore
     pool = None
     try:

xinference/model/audio/core.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .f5tts import F5TTSModel
 from .f5tts_mlx import F5TTSMLXModel
 from .fish_speech import FishSpeechModel
 from .funasr import FunASRModel
+from .kokoro import KokoroModel
 from .melotts import MeloTTSModel
 from .whisper import WhisperModel
 from .whisper_mlx import WhisperMLXModel
@@ -176,6 +177,7 @@ def create_audio_model_instance(
         F5TTSModel,
         F5TTSMLXModel,
         MeloTTSModel,
+        KokoroModel,
     ],
     AudioModelDescription,
 ]:
@@ -192,6 +194,7 @@ def create_audio_model_instance(
         F5TTSModel,
         F5TTSMLXModel,
         MeloTTSModel,
+        KokoroModel,
     ]
     if model_spec.model_family == "whisper":
         if not model_spec.engine:
@@ -212,6 +215,8 @@ def create_audio_model_instance(
         model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
     elif model_spec.model_family == "MeloTTS":
         model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
+    elif model_spec.model_family == "Kokoro":
+        model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
     else:
         raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
     model_description = AudioModelDescription(

xinference/model/audio/kokoro.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+import numpy as np
+from ...device_utils import get_available_device, is_device_available
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class KokoroModel:
+    # The available voices, should keep sync with https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
+    VOICES = [
+        "af_alloy",
+        "af_aoede",
+        "af_bella",
+        "af_jessica",
+        "af_kore",
+        "af_nicole",
+        "af_nova",
+        "af_river",
+        "af_sarah",
+        "af_sky",
+        "am_adam",
+        "am_echo",
+        "am_eric",
+        "am_fenrir",
+        "am_liam",
+        "am_michael",
+        "am_onyx",
+        "am_puck",
+        "bf_alice",
+        "bf_emma",
+        "bf_isabella",
+        "bf_lily",
+        "bm_daniel",
+        "bm_fable",
+        "bm_george",
+        "bm_lewis",
+    ]
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        if self._device is None:
+            self._device = get_available_device()
+        else:
+            if not is_device_available(self._device):
+                raise ValueError(f"Device {self._device} is not available!")
+        import os
+        from kokoro import KModel, KPipeline
+        config_path = os.path.join(self._model_path, "config.json")
+        model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
+        # LANG_CODES = dict(
+        #     a='American English',
+        #     b='British English',
+        # )
+        lang_code = self._kwargs.get("lang_code", "a")
+        self._model = KPipeline(
+            lang_code=lang_code,
+            model=KModel(config=config_path, model=model_path),
+            device=self._device,
+        )
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("Kokoro does not support stream mode.")
+        assert self._model is not None
+        if not voice:
+            voice = next(iter(self.VOICES))
+            logger.info("Auto select speaker: %s", voice)
+        elif not voice.endswith(".pt") and voice not in self.VOICES:
+            raise ValueError(
+                f"Invalid voice: {voice}, available speakers: {self.VOICES}"
+            )
+        else:
+            logger.info("Using custom voice pt: %s", voice)
+        logger.info("Speech kwargs: %s", kwargs)
+        generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
+        results = list(generator)
+        audio = np.concatenate([r[2] for r in results])
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out,
+                "w",
+                24000,
+                1,
+                format=response_format.upper(),
+            ) as f:
+                f.write(audio)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -338,5 +338,13 @@
     "model_ability": "text-to-audio",
     "multilingual": false,
     "language": "KR"
+  },
+  {
+    "model_name": "Kokoro-82M",
+    "model_family": "Kokoro",
+    "model_id": "hexgrad/Kokoro-82M",
+    "model_revision": "7a29fcdf8e997bac6d6f5f6f0c2f0b92912f6102",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -100,5 +100,14 @@
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "Kokoro-82M",
+    "model_family": "Kokoro",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/Kokoro-82M",
+    "model_revision": "master",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -28,7 +28,7 @@ from ....types import (
 )
 from ..core import LLM
 from ..llm_family import LLMFamilyV1, LLMSpecV1
-from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
+from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
 logger = logging.getLogger(__name__)
@@ -123,18 +123,22 @@ class LlamaCppModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        # handle legacy cache.
-        model_path = os.path.realpath(
-            os.path.join(
-                self.model_path,
-                self.model_spec.model_file_name_template.format(
-                    quantization=self.quantization
-                ),
+        if os.path.isfile(self.model_path):
+            # mostly passed from --model_path
+            model_path = os.path.realpath(self.model_path)
+        else:
+            # handle legacy cache.
+            model_path = os.path.realpath(
+                os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_template.format(
+                        quantization=self.quantization
+                    ),
+                )
             )
-        )
-        legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-        if os.path.exists(legacy_model_file_path):
-            model_path = legacy_model_file_path
+            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+            if os.path.exists(legacy_model_file_path):
+                model_path = legacy_model_file_path
         try:
             self._llm = Llama(
@@ -272,8 +276,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
         model_family = self.model_family.model_family or self.model_family.model_name
         tools = generate_config.pop("tools", []) if generate_config else None
         full_context_kwargs = {}
-        if tools and model_family in QWEN_TOOL_CALL_FAMILY:
-            full_context_kwargs["tools"] = tools
+        if tools:
+            if model_family in QWEN_TOOL_CALL_FAMILY:
+                full_context_kwargs["tools"] = tools
+            elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
+                self._tools_to_messages_for_deepseek(messages, tools)
         assert self.model_family.chat_template is not None
         full_prompt = self.get_full_context(
             messages, self.model_family.chat_template, **full_context_kwargs

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -7125,6 +7125,91 @@
       "<|endoftext|>"
     ]
   },
+  {
+    "version":1,
+    "context_length":128000,
+    "model_name":"qwen2.5-vl-instruct",
+    "model_lang":[
+      "en",
+      "zh"
+    ],
+    "model_ability":[
+      "chat",
+      "vision"
+    ],
+    "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
+    "model_specs":[
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
+      },
+      {
+        "model_format":"pytorch",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "none"
+        ],
+        "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":3,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":7,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":72,
+        "quantizations":[
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+      "<|im_end|>",
+      "<|endoftext|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,
@@ -7212,7 +7297,7 @@
       "zh"
     ],
     "model_ability":[
-      "chat",
+      "generate",
       "audio"
     ],
     "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -8937,6 +9022,151 @@
       "<｜end▁of▁sentence｜>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "deepseek-r1-distill-llama",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit",
+          "bf16"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "F16"
+        ],
+        "quantization_parts": {
+          "Q6_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q8_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "F16": [
+            "00001-of-00003",
+            "00002-of-00003",
+            "00003-of-00003"
+          ]
+        },
+        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
+        "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
+        "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "3bit",
+          "4bit",
+          "6bit",
+          "8bit"
+        ],
+        "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
+      }
+    ],
+    "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
+    "stop_token_ids": [
+      151643
+    ],
+    "stop": [
+      "<｜end▁of▁sentence｜>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 8192,
@@ -9306,5 +9536,80 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm3-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gguf",
+        "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
+      },
+      {
+        "model_format":"mlx",
+        "model_size_in_billions":8,
+        "quantizations":[
+          "4bit"
+        ],
+        "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
+      }
+    ],
+    "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      128131
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
   }
 ]

xinference 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

Potentially problematic release.

xinference 1.2.1py3-none-any.whl → 1.2.2py3-none-any.whl