xinference 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-01-24T16:52:57+0800",
11
+ "date": "2025-02-08T17:06:47+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a57b99b07b40d1082f69a8fc5b968d56bc3636bc",
15
- "version": "1.2.1"
14
+ "full-revisionid": "ac97a13a831de6debda52e6fdb8c1bf9366be57c",
15
+ "version": "1.2.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -2000,25 +2000,22 @@ class RESTfulAPI(CancelMixin):
2000
2000
 
2001
2001
  from ..model.llm.utils import (
2002
2002
  GLM4_TOOL_CALL_FAMILY,
2003
- LLAMA3_TOOL_CALL_FAMILY,
2004
2003
  QWEN_TOOL_CALL_FAMILY,
2004
+ TOOL_CALL_FAMILY,
2005
2005
  )
2006
2006
 
2007
2007
  model_family = desc.get("model_family", "")
2008
- function_call_models = (
2009
- QWEN_TOOL_CALL_FAMILY + GLM4_TOOL_CALL_FAMILY + LLAMA3_TOOL_CALL_FAMILY
2010
- )
2011
2008
 
2012
- if model_family not in function_call_models:
2009
+ if model_family not in TOOL_CALL_FAMILY:
2013
2010
  if body.tools:
2014
2011
  raise HTTPException(
2015
2012
  status_code=400,
2016
- detail=f"Only {function_call_models} support tool calls",
2013
+ detail=f"Only {TOOL_CALL_FAMILY} support tool calls",
2017
2014
  )
2018
2015
  if has_tool_message:
2019
2016
  raise HTTPException(
2020
2017
  status_code=400,
2021
- detail=f"Only {function_call_models} support tool messages",
2018
+ detail=f"Only {TOOL_CALL_FAMILY} support tool messages",
2022
2019
  )
2023
2020
  if body.tools and body.stream:
2024
2021
  is_vllm = await model.is_vllm_backend()
@@ -13,3 +13,6 @@ from .restful.restful_client import ( # noqa: F401
13
13
  from .restful.restful_client import ( # noqa: F401
14
14
  RESTfulImageModelHandle as ImageModelHandle,
15
15
  )
16
+ from .restful.restful_client import ( # noqa: F401
17
+ RESTfulVideoModelHandle as VideoModelHandle,
18
+ )
@@ -269,16 +269,13 @@ class InferenceRequest:
269
269
  )
270
270
 
271
271
 
272
- def _get_valid_batch_kv_cache(data, skipped_indexes: Set[int]):
273
- from transformers.cache_utils import DynamicCache
274
-
275
- cache = DynamicCache.from_legacy_cache(data)
272
+ def _get_valid_batch_kv_cache(cache, skipped_indexes: Set[int]):
276
273
  batch_size = cache.key_cache[0].shape[0]
277
274
  batch_slices = [num for num in range(batch_size) if num not in skipped_indexes]
278
275
  for idx in range(len(cache)):
279
- cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::]
280
- cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::]
281
- return cache.to_legacy_cache()
276
+ cache.key_cache[idx] = cache.key_cache[idx][batch_slices, ::].contiguous()
277
+ cache.value_cache[idx] = cache.value_cache[idx][batch_slices, ::].contiguous()
278
+ return cache
282
279
 
283
280
 
284
281
  class SchedulerActor(xo.StatelessActor):
@@ -41,7 +41,8 @@ async def _start_local_cluster(
41
41
  ):
42
42
  from .utils import create_worker_actor_pool
43
43
 
44
- logging.config.dictConfig(logging_conf) # type: ignore
44
+ if logging_conf:
45
+ logging.config.dictConfig(logging_conf) # type: ignore
45
46
 
46
47
  pool = None
47
48
  try:
@@ -25,6 +25,7 @@ from .f5tts import F5TTSModel
25
25
  from .f5tts_mlx import F5TTSMLXModel
26
26
  from .fish_speech import FishSpeechModel
27
27
  from .funasr import FunASRModel
28
+ from .kokoro import KokoroModel
28
29
  from .melotts import MeloTTSModel
29
30
  from .whisper import WhisperModel
30
31
  from .whisper_mlx import WhisperMLXModel
@@ -176,6 +177,7 @@ def create_audio_model_instance(
176
177
  F5TTSModel,
177
178
  F5TTSMLXModel,
178
179
  MeloTTSModel,
180
+ KokoroModel,
179
181
  ],
180
182
  AudioModelDescription,
181
183
  ]:
@@ -192,6 +194,7 @@ def create_audio_model_instance(
192
194
  F5TTSModel,
193
195
  F5TTSMLXModel,
194
196
  MeloTTSModel,
197
+ KokoroModel,
195
198
  ]
196
199
  if model_spec.model_family == "whisper":
197
200
  if not model_spec.engine:
@@ -212,6 +215,8 @@ def create_audio_model_instance(
212
215
  model = F5TTSMLXModel(model_uid, model_path, model_spec, **kwargs)
213
216
  elif model_spec.model_family == "MeloTTS":
214
217
  model = MeloTTSModel(model_uid, model_path, model_spec, **kwargs)
218
+ elif model_spec.model_family == "Kokoro":
219
+ model = KokoroModel(model_uid, model_path, model_spec, **kwargs)
215
220
  else:
216
221
  raise Exception(f"Unsupported audio model family: {model_spec.model_family}")
217
222
  model_description = AudioModelDescription(
@@ -0,0 +1,139 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ from io import BytesIO
16
+ from typing import TYPE_CHECKING, Optional
17
+
18
+ import numpy as np
19
+
20
+ from ...device_utils import get_available_device, is_device_available
21
+
22
+ if TYPE_CHECKING:
23
+ from .core import AudioModelFamilyV1
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class KokoroModel:
29
+ # The available voices, should keep sync with https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
30
+ VOICES = [
31
+ "af_alloy",
32
+ "af_aoede",
33
+ "af_bella",
34
+ "af_jessica",
35
+ "af_kore",
36
+ "af_nicole",
37
+ "af_nova",
38
+ "af_river",
39
+ "af_sarah",
40
+ "af_sky",
41
+ "am_adam",
42
+ "am_echo",
43
+ "am_eric",
44
+ "am_fenrir",
45
+ "am_liam",
46
+ "am_michael",
47
+ "am_onyx",
48
+ "am_puck",
49
+ "bf_alice",
50
+ "bf_emma",
51
+ "bf_isabella",
52
+ "bf_lily",
53
+ "bm_daniel",
54
+ "bm_fable",
55
+ "bm_george",
56
+ "bm_lewis",
57
+ ]
58
+
59
+ def __init__(
60
+ self,
61
+ model_uid: str,
62
+ model_path: str,
63
+ model_spec: "AudioModelFamilyV1",
64
+ device: Optional[str] = None,
65
+ **kwargs,
66
+ ):
67
+ self._model_uid = model_uid
68
+ self._model_path = model_path
69
+ self._model_spec = model_spec
70
+ self._device = device
71
+ self._model = None
72
+ self._kwargs = kwargs
73
+
74
+ @property
75
+ def model_ability(self):
76
+ return self._model_spec.model_ability
77
+
78
+ def load(self):
79
+ if self._device is None:
80
+ self._device = get_available_device()
81
+ else:
82
+ if not is_device_available(self._device):
83
+ raise ValueError(f"Device {self._device} is not available!")
84
+
85
+ import os
86
+
87
+ from kokoro import KModel, KPipeline
88
+
89
+ config_path = os.path.join(self._model_path, "config.json")
90
+ model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
91
+ # LANG_CODES = dict(
92
+ # a='American English',
93
+ # b='British English',
94
+ # )
95
+ lang_code = self._kwargs.get("lang_code", "a")
96
+ self._model = KPipeline(
97
+ lang_code=lang_code,
98
+ model=KModel(config=config_path, model=model_path),
99
+ device=self._device,
100
+ )
101
+
102
+ def speech(
103
+ self,
104
+ input: str,
105
+ voice: str,
106
+ response_format: str = "mp3",
107
+ speed: float = 1.0,
108
+ stream: bool = False,
109
+ **kwargs,
110
+ ):
111
+ import soundfile
112
+
113
+ if stream:
114
+ raise Exception("Kokoro does not support stream mode.")
115
+ assert self._model is not None
116
+ if not voice:
117
+ voice = next(iter(self.VOICES))
118
+ logger.info("Auto select speaker: %s", voice)
119
+ elif not voice.endswith(".pt") and voice not in self.VOICES:
120
+ raise ValueError(
121
+ f"Invalid voice: {voice}, available speakers: {self.VOICES}"
122
+ )
123
+ else:
124
+ logger.info("Using custom voice pt: %s", voice)
125
+ logger.info("Speech kwargs: %s", kwargs)
126
+ generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
127
+ results = list(generator)
128
+ audio = np.concatenate([r[2] for r in results])
129
+ # Save the generated audio
130
+ with BytesIO() as out:
131
+ with soundfile.SoundFile(
132
+ out,
133
+ "w",
134
+ 24000,
135
+ 1,
136
+ format=response_format.upper(),
137
+ ) as f:
138
+ f.write(audio)
139
+ return out.getvalue()
@@ -338,5 +338,13 @@
338
338
  "model_ability": "text-to-audio",
339
339
  "multilingual": false,
340
340
  "language": "KR"
341
+ },
342
+ {
343
+ "model_name": "Kokoro-82M",
344
+ "model_family": "Kokoro",
345
+ "model_id": "hexgrad/Kokoro-82M",
346
+ "model_revision": "7a29fcdf8e997bac6d6f5f6f0c2f0b92912f6102",
347
+ "model_ability": "text-to-audio",
348
+ "multilingual": true
341
349
  }
342
350
  ]
@@ -100,5 +100,14 @@
100
100
  "model_revision": "master",
101
101
  "model_ability": "text-to-audio",
102
102
  "multilingual": true
103
+ },
104
+ {
105
+ "model_name": "Kokoro-82M",
106
+ "model_family": "Kokoro",
107
+ "model_hub": "modelscope",
108
+ "model_id": "AI-ModelScope/Kokoro-82M",
109
+ "model_revision": "master",
110
+ "model_ability": "text-to-audio",
111
+ "multilingual": true
103
112
  }
104
113
  ]
@@ -28,7 +28,7 @@ from ....types import (
28
28
  )
29
29
  from ..core import LLM
30
30
  from ..llm_family import LLMFamilyV1, LLMSpecV1
31
- from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
31
+ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
32
32
 
33
33
  logger = logging.getLogger(__name__)
34
34
 
@@ -123,18 +123,22 @@ class LlamaCppModel(LLM):
123
123
 
124
124
  raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
125
125
 
126
- # handle legacy cache.
127
- model_path = os.path.realpath(
128
- os.path.join(
129
- self.model_path,
130
- self.model_spec.model_file_name_template.format(
131
- quantization=self.quantization
132
- ),
126
+ if os.path.isfile(self.model_path):
127
+ # mostly passed from --model_path
128
+ model_path = os.path.realpath(self.model_path)
129
+ else:
130
+ # handle legacy cache.
131
+ model_path = os.path.realpath(
132
+ os.path.join(
133
+ self.model_path,
134
+ self.model_spec.model_file_name_template.format(
135
+ quantization=self.quantization
136
+ ),
137
+ )
133
138
  )
134
- )
135
- legacy_model_file_path = os.path.join(self.model_path, "model.bin")
136
- if os.path.exists(legacy_model_file_path):
137
- model_path = legacy_model_file_path
139
+ legacy_model_file_path = os.path.join(self.model_path, "model.bin")
140
+ if os.path.exists(legacy_model_file_path):
141
+ model_path = legacy_model_file_path
138
142
 
139
143
  try:
140
144
  self._llm = Llama(
@@ -272,8 +276,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
272
276
  model_family = self.model_family.model_family or self.model_family.model_name
273
277
  tools = generate_config.pop("tools", []) if generate_config else None
274
278
  full_context_kwargs = {}
275
- if tools and model_family in QWEN_TOOL_CALL_FAMILY:
276
- full_context_kwargs["tools"] = tools
279
+ if tools:
280
+ if model_family in QWEN_TOOL_CALL_FAMILY:
281
+ full_context_kwargs["tools"] = tools
282
+ elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
283
+ self._tools_to_messages_for_deepseek(messages, tools)
277
284
  assert self.model_family.chat_template is not None
278
285
  full_prompt = self.get_full_context(
279
286
  messages, self.model_family.chat_template, **full_context_kwargs
@@ -7125,6 +7125,91 @@
7125
7125
  "<|endoftext|>"
7126
7126
  ]
7127
7127
  },
7128
+ {
7129
+ "version":1,
7130
+ "context_length":128000,
7131
+ "model_name":"qwen2.5-vl-instruct",
7132
+ "model_lang":[
7133
+ "en",
7134
+ "zh"
7135
+ ],
7136
+ "model_ability":[
7137
+ "chat",
7138
+ "vision"
7139
+ ],
7140
+ "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
7141
+ "model_specs":[
7142
+ {
7143
+ "model_format":"pytorch",
7144
+ "model_size_in_billions":3,
7145
+ "quantizations":[
7146
+ "none"
7147
+ ],
7148
+ "model_id":"Qwen/Qwen2.5-VL-3B-Instruct"
7149
+ },
7150
+ {
7151
+ "model_format":"pytorch",
7152
+ "model_size_in_billions":7,
7153
+ "quantizations":[
7154
+ "none"
7155
+ ],
7156
+ "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7157
+ },
7158
+ {
7159
+ "model_format":"pytorch",
7160
+ "model_size_in_billions":72,
7161
+ "quantizations":[
7162
+ "none"
7163
+ ],
7164
+ "model_id":"Qwen/Qwen2.5-VL-72B-Instruct"
7165
+ },
7166
+ {
7167
+ "model_format":"mlx",
7168
+ "model_size_in_billions":3,
7169
+ "quantizations":[
7170
+ "3bit",
7171
+ "4bit",
7172
+ "6bit",
7173
+ "8bit",
7174
+ "bf16"
7175
+ ],
7176
+ "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
7177
+ },
7178
+ {
7179
+ "model_format":"mlx",
7180
+ "model_size_in_billions":7,
7181
+ "quantizations":[
7182
+ "3bit",
7183
+ "4bit",
7184
+ "6bit",
7185
+ "8bit",
7186
+ "bf16"
7187
+ ],
7188
+ "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
7189
+ },
7190
+ {
7191
+ "model_format":"mlx",
7192
+ "model_size_in_billions":72,
7193
+ "quantizations":[
7194
+ "3bit",
7195
+ "4bit",
7196
+ "6bit",
7197
+ "8bit",
7198
+ "bf16"
7199
+ ],
7200
+ "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
7201
+ }
7202
+ ],
7203
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
7204
+ "stop_token_ids": [
7205
+ 151645,
7206
+ 151643
7207
+ ],
7208
+ "stop": [
7209
+ "<|im_end|>",
7210
+ "<|endoftext|>"
7211
+ ]
7212
+ },
7128
7213
  {
7129
7214
  "version": 1,
7130
7215
  "context_length": 32768,
@@ -7212,7 +7297,7 @@
7212
7297
  "zh"
7213
7298
  ],
7214
7299
  "model_ability":[
7215
- "chat",
7300
+ "generate",
7216
7301
  "audio"
7217
7302
  ],
7218
7303
  "model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
@@ -8937,6 +9022,151 @@
8937
9022
  "<|end▁of▁sentence|>"
8938
9023
  ]
8939
9024
  },
9025
+ {
9026
+ "version": 1,
9027
+ "context_length": 131072,
9028
+ "model_name": "deepseek-r1-distill-llama",
9029
+ "model_lang": [
9030
+ "en",
9031
+ "zh"
9032
+ ],
9033
+ "model_ability": [
9034
+ "chat"
9035
+ ],
9036
+ "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
9037
+ "model_specs": [
9038
+ {
9039
+ "model_format": "pytorch",
9040
+ "model_size_in_billions": 8,
9041
+ "quantizations": [
9042
+ "4-bit",
9043
+ "8-bit",
9044
+ "none"
9045
+ ],
9046
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
9047
+ },
9048
+ {
9049
+ "model_format": "awq",
9050
+ "model_size_in_billions": 8,
9051
+ "quantizations": [
9052
+ "Int4"
9053
+ ],
9054
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_AWQ"
9055
+ },
9056
+ {
9057
+ "model_format": "gptq",
9058
+ "model_size_in_billions": 8,
9059
+ "quantizations": [
9060
+ "Int4"
9061
+ ],
9062
+ "model_id": "jakiAJK/DeepSeek-R1-Distill-Llama-8B_GPTQ-int4"
9063
+ },
9064
+ {
9065
+ "model_format": "ggufv2",
9066
+ "model_size_in_billions": "1_5",
9067
+ "quantizations": [
9068
+ "Q2_K",
9069
+ "Q2_K_L",
9070
+ "Q3_K_M",
9071
+ "Q4_K_M",
9072
+ "Q5_K_M",
9073
+ "Q6_K",
9074
+ "Q8_0",
9075
+ "F16"
9076
+ ],
9077
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
9078
+ "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf"
9079
+ },
9080
+ {
9081
+ "model_format": "mlx",
9082
+ "model_size_in_billions": 8,
9083
+ "quantizations": [
9084
+ "3bit",
9085
+ "4bit",
9086
+ "6bit",
9087
+ "8bit",
9088
+ "bf16"
9089
+ ],
9090
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-8B-{quantization}"
9091
+ },
9092
+ {
9093
+ "model_format": "pytorch",
9094
+ "model_size_in_billions": 70,
9095
+ "quantizations": [
9096
+ "4-bit",
9097
+ "8-bit",
9098
+ "none"
9099
+ ],
9100
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
9101
+ },
9102
+ {
9103
+ "model_format": "awq",
9104
+ "model_size_in_billions": 70,
9105
+ "quantizations": [
9106
+ "Int4"
9107
+ ],
9108
+ "model_id": "casperhansen/deepseek-r1-distill-llama-70b-awq"
9109
+ },
9110
+ {
9111
+ "model_format": "gptq",
9112
+ "model_size_in_billions": 70,
9113
+ "quantizations": [
9114
+ "Int4"
9115
+ ],
9116
+ "model_id": "empirischtech/DeepSeek-R1-Distill-Llama-70B-gptq-4bit"
9117
+ },
9118
+ {
9119
+ "model_format": "ggufv2",
9120
+ "model_size_in_billions": 70,
9121
+ "quantizations": [
9122
+ "Q2_K",
9123
+ "Q2_K_L",
9124
+ "Q3_K_M",
9125
+ "Q4_K_M",
9126
+ "Q5_K_M",
9127
+ "Q6_K",
9128
+ "Q8_0",
9129
+ "F16"
9130
+ ],
9131
+ "quantization_parts": {
9132
+ "Q6_K": [
9133
+ "00001-of-00002",
9134
+ "00002-of-00002"
9135
+ ],
9136
+ "Q8_0": [
9137
+ "00001-of-00002",
9138
+ "00002-of-00002"
9139
+ ],
9140
+ "F16": [
9141
+ "00001-of-00003",
9142
+ "00002-of-00003",
9143
+ "00003-of-00003"
9144
+ ]
9145
+ },
9146
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
9147
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
9148
+ "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf"
9149
+ },
9150
+ {
9151
+ "model_format": "mlx",
9152
+ "model_size_in_billions": 70,
9153
+ "quantizations": [
9154
+ "3bit",
9155
+ "4bit",
9156
+ "6bit",
9157
+ "8bit"
9158
+ ],
9159
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
9160
+ }
9161
+ ],
9162
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9163
+ "stop_token_ids": [
9164
+ 151643
9165
+ ],
9166
+ "stop": [
9167
+ "<|end▁of▁sentence|>"
9168
+ ]
9169
+ },
8940
9170
  {
8941
9171
  "version": 1,
8942
9172
  "context_length": 8192,
@@ -9306,5 +9536,80 @@
9306
9536
  "<|user|>",
9307
9537
  "<|observation|>"
9308
9538
  ]
9539
+ },
9540
+ {
9541
+ "version": 1,
9542
+ "context_length": 32768,
9543
+ "model_name": "internlm3-instruct",
9544
+ "model_lang": [
9545
+ "en",
9546
+ "zh"
9547
+ ],
9548
+ "model_ability": [
9549
+ "chat",
9550
+ "tools"
9551
+ ],
9552
+ "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
9553
+ "model_specs": [
9554
+ {
9555
+ "model_format": "pytorch",
9556
+ "model_size_in_billions": 8,
9557
+ "quantizations": [
9558
+ "4-bit",
9559
+ "8-bit",
9560
+ "none"
9561
+ ],
9562
+ "model_id": "internlm/internlm3-8b-instruct"
9563
+ },
9564
+ {
9565
+ "model_format": "gptq",
9566
+ "model_size_in_billions": 8,
9567
+ "quantizations": [
9568
+ "Int4"
9569
+ ],
9570
+ "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
9571
+ },
9572
+ {
9573
+ "model_format": "awq",
9574
+ "model_size_in_billions": 8,
9575
+ "quantizations": [
9576
+ "Int4"
9577
+ ],
9578
+ "model_id": "internlm/internlm3-8b-instruct-awq"
9579
+ },
9580
+ {
9581
+ "model_format": "ggufv2",
9582
+ "model_size_in_billions": 8,
9583
+ "quantizations": [
9584
+ "q2_k",
9585
+ "q3_k_m",
9586
+ "q4_0",
9587
+ "q4_k_m",
9588
+ "q5_0",
9589
+ "q5_k_m",
9590
+ "q6_k",
9591
+ "q8_0"
9592
+ ],
9593
+ "model_id": "internlm/internlm3-8b-instruct-gguf",
9594
+ "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
9595
+ },
9596
+ {
9597
+ "model_format":"mlx",
9598
+ "model_size_in_billions":8,
9599
+ "quantizations":[
9600
+ "4bit"
9601
+ ],
9602
+ "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
9603
+ }
9604
+ ],
9605
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
9606
+ "stop_token_ids": [
9607
+ 2,
9608
+ 128131
9609
+ ],
9610
+ "stop": [
9611
+ "</s>",
9612
+ "<|im_end|>"
9613
+ ]
9309
9614
  }
9310
9615
  ]