xinference 1.3.1.post1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (42) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +1 -1
  3. xinference/model/llm/__init__.py +3 -0
  4. xinference/model/llm/llama_cpp/core.py +44 -14
  5. xinference/model/llm/llm_family.json +271 -12
  6. xinference/model/llm/llm_family_modelscope.json +248 -13
  7. xinference/model/llm/mlx/core.py +15 -11
  8. xinference/model/llm/reasoning_parser.py +14 -6
  9. xinference/model/llm/sglang/core.py +2 -0
  10. xinference/model/llm/transformers/core.py +3 -2
  11. xinference/model/llm/transformers/gemma3.py +185 -0
  12. xinference/model/llm/transformers/intern_vl.py +0 -2
  13. xinference/model/llm/utils.py +37 -29
  14. xinference/model/llm/vllm/core.py +8 -3
  15. xinference/types.py +2 -2
  16. xinference/web/ui/build/asset-manifest.json +6 -6
  17. xinference/web/ui/build/index.html +1 -1
  18. xinference/web/ui/build/static/css/main.b494ae7e.css +2 -0
  19. xinference/web/ui/build/static/css/main.b494ae7e.css.map +1 -0
  20. xinference/web/ui/build/static/js/main.3cea968e.js +3 -0
  21. xinference/web/ui/build/static/js/main.3cea968e.js.map +1 -0
  22. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/cc97b49285d7717c63374766c789141a4329a04582ab32756d7e0e614d4c5c7f.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +1 -0
  25. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +1 -0
  26. xinference/web/ui/src/locales/en.json +2 -2
  27. xinference/web/ui/src/locales/zh.json +1 -1
  28. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/METADATA +1 -1
  29. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/RECORD +34 -33
  30. xinference/web/ui/build/static/css/main.f8177338.css +0 -2
  31. xinference/web/ui/build/static/css/main.f8177338.css.map +0 -1
  32. xinference/web/ui/build/static/js/main.55b70cb7.js +0 -3
  33. xinference/web/ui/build/static/js/main.55b70cb7.js.map +0 -1
  34. xinference/web/ui/node_modules/.cache/babel-loader/2deac8d5636974533e3714f34e94fc754f9153a07c6ee11e72846cb8eae47e4b.json +0 -1
  35. xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +0 -1
  36. xinference/web/ui/node_modules/.cache/babel-loader/e23d476fcbf6fd69c8986bf82133d257d28aa8fc9a5cab231d81c1c75c58cd99.json +0 -1
  37. xinference/web/ui/node_modules/.cache/babel-loader/e7a8c37fda8725cab69c7ef8c627060bd7fc806adc67e00fe628ba148cb86d7f.json +0 -1
  38. /xinference/web/ui/build/static/js/{main.55b70cb7.js.LICENSE.txt → main.3cea968e.js.LICENSE.txt} +0 -0
  39. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/LICENSE +0 -0
  40. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/WHEEL +0 -0
  41. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/entry_points.txt +0 -0
  42. {xinference-1.3.1.post1.dist-info → xinference-1.4.0.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-03-11T12:00:36+0800",
11
+ "date": "2025-03-21T14:33:52+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "2ef99fbb5450a76a6ba07a909f58b8c2e4c22a28",
15
- "version": "1.3.1.post1"
14
+ "full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
15
+ "version": "1.4.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -137,7 +137,7 @@ class GradioInterface:
137
137
  ):
138
138
  assert isinstance(chunk, dict)
139
139
  delta = chunk["choices"][0]["delta"]
140
- if "content" not in delta:
140
+ if "content" not in delta or delta["content"] is None:
141
141
  continue
142
142
  else:
143
143
  # some model like deepseek-r1-distill-qwen
@@ -143,6 +143,7 @@ def _install():
143
143
  DeepSeekV2PytorchModel,
144
144
  )
145
145
  from .transformers.deepseek_vl import DeepSeekVLChatModel
146
+ from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
146
147
  from .transformers.glm4v import Glm4VModel
147
148
  from .transformers.glm_edge_v import GlmEdgeVModel
148
149
  from .transformers.intern_vl import InternVLChatModel
@@ -198,6 +199,8 @@ def _install():
198
199
  OptPytorchModel,
199
200
  GlmEdgeVModel,
200
201
  CogAgentChatModel,
202
+ Gemma3TextChatModel,
203
+ Gemma3ChatModel,
201
204
  ]
202
205
  )
203
206
  if OmniLMMModel: # type: ignore
@@ -39,10 +39,15 @@ logger = logging.getLogger(__name__)
39
39
  USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 0)))
40
40
 
41
41
 
42
- class _Sentinel:
42
+ class _Done:
43
43
  pass
44
44
 
45
45
 
46
+ class _Error:
47
+ def __init__(self, msg):
48
+ self.msg = msg
49
+
50
+
46
51
  class XllamaCppModel(LLM, ChatModelMixin):
47
52
  def __init__(
48
53
  self,
@@ -200,7 +205,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
200
205
  )
201
206
  prompt_json = orjson.dumps(data)
202
207
 
203
- def _res_callback(ok):
208
+ def _error_callback(err):
209
+ try:
210
+ msg = orjson.loads(err)
211
+ q.put(_Error(msg))
212
+ except Exception as e:
213
+ q.put(_Error(str(e)))
214
+
215
+ def _ok_callback(ok):
204
216
  try:
205
217
  res = orjson.loads(ok)
206
218
  res["model"] = self.model_uid
@@ -209,10 +221,10 @@ class XllamaCppModel(LLM, ChatModelMixin):
209
221
  logger.exception("handle_completions callback failed: %s", e)
210
222
 
211
223
  try:
212
- self._llm.handle_completions(prompt_json, _res_callback, _res_callback)
224
+ self._llm.handle_completions(prompt_json, _error_callback, _ok_callback)
213
225
  except Exception as ex:
214
226
  logger.exception("handle_completions failed: %s", ex)
215
- q.put(_Sentinel)
227
+ q.put(_Done)
216
228
 
217
229
  assert self._executor
218
230
  self._executor.submit(_handle_completion)
@@ -220,12 +232,17 @@ class XllamaCppModel(LLM, ChatModelMixin):
220
232
  if stream:
221
233
 
222
234
  def _to_iterator():
223
- while (r := q.get()) is not _Sentinel:
235
+ while (r := q.get()) is not _Done:
236
+ if type(r) is _Error:
237
+ raise Exception("Got error in generate stream: %s", r.msg)
224
238
  yield r
225
239
 
226
240
  return _to_iterator()
227
241
  else:
228
- return q.get()
242
+ r = q.get()
243
+ if type(r) is _Error:
244
+ raise Exception("Got error in generate: %s", r.msg)
245
+ return r
229
246
 
230
247
  def chat(
231
248
  self,
@@ -253,7 +270,14 @@ class XllamaCppModel(LLM, ChatModelMixin):
253
270
  )
254
271
  prompt_json = orjson.dumps(data)
255
272
 
256
- def _res_callback(ok):
273
+ def _error_callback(err):
274
+ try:
275
+ msg = orjson.loads(err)
276
+ q.put(_Error(msg))
277
+ except Exception as e:
278
+ q.put(_Error(str(e)))
279
+
280
+ def _ok_callback(ok):
257
281
  try:
258
282
  res = orjson.loads(ok)
259
283
  res["model"] = self.model_uid
@@ -263,11 +287,11 @@ class XllamaCppModel(LLM, ChatModelMixin):
263
287
 
264
288
  try:
265
289
  self._llm.handle_chat_completions(
266
- prompt_json, _res_callback, _res_callback
290
+ prompt_json, _error_callback, _ok_callback
267
291
  )
268
292
  except Exception as ex:
269
293
  logger.exception("handle_chat_completions failed: %s", ex)
270
- q.put(_Sentinel)
294
+ q.put(_Done)
271
295
 
272
296
  assert self._executor
273
297
  self._executor.submit(_handle_chat_completion)
@@ -275,14 +299,19 @@ class XllamaCppModel(LLM, ChatModelMixin):
275
299
  if stream:
276
300
 
277
301
  def _to_iterator():
278
- while (r := q.get()) is not _Sentinel:
302
+ while (r := q.get()) is not _Done:
303
+ if type(r) is _Error:
304
+ raise Exception("Got error in chat stream: %s", r.msg)
279
305
  yield r
280
306
 
281
307
  return self._to_chat_completion_chunks(
282
308
  _to_iterator(), self.reasoning_parser
283
309
  )
284
310
  else:
285
- return self._to_chat_completion(q.get(), self.reasoning_parser)
311
+ r = q.get()
312
+ if type(r) is _Error:
313
+ raise Exception("Got error in chat: %s", r.msg)
314
+ return self._to_chat_completion(r, self.reasoning_parser)
286
315
 
287
316
 
288
317
  class LlamaCppModel(LLM):
@@ -533,10 +562,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
533
562
  tools = generate_config.pop("tools", []) if generate_config else None
534
563
  full_context_kwargs = {}
535
564
  if tools:
536
- if model_family in QWEN_TOOL_CALL_FAMILY:
565
+ if (
566
+ model_family in QWEN_TOOL_CALL_FAMILY
567
+ or model_family in DEEPSEEK_TOOL_CALL_FAMILY
568
+ ):
537
569
  full_context_kwargs["tools"] = tools
538
- elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
539
- self._tools_to_messages_for_deepseek(messages, tools)
540
570
  assert self.model_family.chat_template is not None
541
571
  full_prompt = self.get_full_context(
542
572
  messages, self.model_family.chat_template, **full_context_kwargs
@@ -5786,6 +5786,265 @@
5786
5786
  "<start_of_turn>"
5787
5787
  ]
5788
5788
  },
5789
+ {
5790
+ "version": 1,
5791
+ "context_length": 32768,
5792
+ "model_name": "gemma-3-1b-it",
5793
+ "model_lang": [
5794
+ "en"
5795
+ ],
5796
+ "model_ability": [
5797
+ "chat"
5798
+ ],
5799
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5800
+ "model_specs": [
5801
+ {
5802
+ "model_format": "pytorch",
5803
+ "model_size_in_billions": 1,
5804
+ "quantizations": [
5805
+ "none",
5806
+ "4-bit",
5807
+ "8-bit"
5808
+ ],
5809
+ "model_id": "google/gemma-3-1b-it"
5810
+ },
5811
+ {
5812
+ "model_format": "ggufv2",
5813
+ "model_size_in_billions": 1,
5814
+ "quantizations": [
5815
+ "IQ2_M",
5816
+ "IQ3_M",
5817
+ "IQ3_XS",
5818
+ "IQ3_XXS",
5819
+ "IQ4_NL",
5820
+ "IQ4_XS",
5821
+ "Q2_K",
5822
+ "Q2_K_L",
5823
+ "Q3_K_L",
5824
+ "Q3_K_M",
5825
+ "Q3_K_S",
5826
+ "Q4_0",
5827
+ "Q4_1",
5828
+ "Q4_K_L",
5829
+ "Q4_K_M",
5830
+ "Q4_K_S",
5831
+ "Q5_K_L",
5832
+ "Q5_K_M",
5833
+ "Q5_K_S",
5834
+ "Q6_K",
5835
+ "Q6_K_L",
5836
+ "Q8_0",
5837
+ "bf16"
5838
+ ],
5839
+ "model_id": "bartowski/google_gemma-3-1b-it-GGUF",
5840
+ "model_file_name_template": "google_gemma-3-1b-it-{quantization}.gguf"
5841
+ },
5842
+ {
5843
+ "model_format": "mlx",
5844
+ "model_size_in_billions": 1,
5845
+ "quantizations": [
5846
+ "4bit",
5847
+ "6bit",
5848
+ "8bit",
5849
+ "fp16"
5850
+ ],
5851
+ "model_id": "mlx-community/gemma-3-1b-it-{quantization}"
5852
+ }
5853
+ ],
5854
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
5855
+ "stop_token_ids": [
5856
+ 1,
5857
+ 105,
5858
+ 106
5859
+ ],
5860
+ "stop": [
5861
+ "<eos>",
5862
+ "<end_of_turn>",
5863
+ "<start_of_turn>"
5864
+ ]
5865
+ },
5866
+ {
5867
+ "version": 1,
5868
+ "context_length": 131072,
5869
+ "model_name": "gemma-3-it",
5870
+ "model_lang": [
5871
+ "en"
5872
+ ],
5873
+ "model_ability": [
5874
+ "chat",
5875
+ "vision"
5876
+ ],
5877
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
5878
+ "model_specs": [
5879
+ {
5880
+ "model_format": "pytorch",
5881
+ "model_size_in_billions": 4,
5882
+ "quantizations": [
5883
+ "none",
5884
+ "4-bit",
5885
+ "8-bit"
5886
+ ],
5887
+ "model_id": "google/gemma-3-4b-it"
5888
+ },
5889
+ {
5890
+ "model_format": "pytorch",
5891
+ "model_size_in_billions": 12,
5892
+ "quantizations": [
5893
+ "none",
5894
+ "4-bit",
5895
+ "8-bit"
5896
+ ],
5897
+ "model_id": "google/gemma-3-12b-it"
5898
+ },
5899
+ {
5900
+ "model_format": "pytorch",
5901
+ "model_size_in_billions": 27,
5902
+ "quantizations": [
5903
+ "none",
5904
+ "4-bit",
5905
+ "8-bit"
5906
+ ],
5907
+ "model_id": "google/gemma-3-27b-it"
5908
+ },
5909
+ {
5910
+ "model_format": "ggufv2",
5911
+ "model_size_in_billions": 4,
5912
+ "quantizations": [
5913
+ "IQ2_M",
5914
+ "IQ3_M",
5915
+ "IQ3_XS",
5916
+ "IQ3_XXS",
5917
+ "IQ4_NL",
5918
+ "IQ4_XS",
5919
+ "Q2_K",
5920
+ "Q2_K_L",
5921
+ "Q3_K_L",
5922
+ "Q3_K_M",
5923
+ "Q3_K_S",
5924
+ "Q4_0",
5925
+ "Q4_1",
5926
+ "Q4_K_L",
5927
+ "Q4_K_M",
5928
+ "Q4_K_S",
5929
+ "Q5_K_L",
5930
+ "Q5_K_M",
5931
+ "Q5_K_S",
5932
+ "Q6_K",
5933
+ "Q6_K_L",
5934
+ "Q8_0",
5935
+ "bf16"
5936
+ ],
5937
+ "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
5938
+ "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf"
5939
+ },
5940
+ {
5941
+ "model_format": "ggufv2",
5942
+ "model_size_in_billions": 12,
5943
+ "quantizations": [
5944
+ "IQ2_M",
5945
+ "IQ3_M",
5946
+ "IQ3_XS",
5947
+ "IQ3_XXS",
5948
+ "IQ4_NL",
5949
+ "IQ4_XS",
5950
+ "Q2_K",
5951
+ "Q2_K_L",
5952
+ "Q3_K_L",
5953
+ "Q3_K_M",
5954
+ "Q3_K_S",
5955
+ "Q4_0",
5956
+ "Q4_1",
5957
+ "Q4_K_L",
5958
+ "Q4_K_M",
5959
+ "Q4_K_S",
5960
+ "Q5_K_L",
5961
+ "Q5_K_M",
5962
+ "Q5_K_S",
5963
+ "Q6_K",
5964
+ "Q6_K_L",
5965
+ "Q8_0",
5966
+ "bf16"
5967
+ ],
5968
+ "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
5969
+ "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf"
5970
+ },
5971
+ {
5972
+ "model_format": "ggufv2",
5973
+ "model_size_in_billions": 27,
5974
+ "quantizations": [
5975
+ "IQ2_M",
5976
+ "IQ3_M",
5977
+ "IQ3_XS",
5978
+ "IQ3_XXS",
5979
+ "IQ4_NL",
5980
+ "IQ4_XS",
5981
+ "Q2_K",
5982
+ "Q2_K_L",
5983
+ "Q3_K_L",
5984
+ "Q3_K_M",
5985
+ "Q3_K_S",
5986
+ "Q4_0",
5987
+ "Q4_1",
5988
+ "Q4_K_L",
5989
+ "Q4_K_M",
5990
+ "Q4_K_S",
5991
+ "Q5_K_L",
5992
+ "Q5_K_M",
5993
+ "Q5_K_S",
5994
+ "Q6_K",
5995
+ "Q6_K_L",
5996
+ "Q8_0",
5997
+ "bf16"
5998
+ ],
5999
+ "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
6000
+ "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf"
6001
+ },
6002
+ {
6003
+ "model_format": "mlx",
6004
+ "model_size_in_billions": 4,
6005
+ "quantizations": [
6006
+ "4bit",
6007
+ "6bit",
6008
+ "8bit",
6009
+ "fp16"
6010
+ ],
6011
+ "model_id": "mlx-community/gemma-3-4b-it-{quantization}"
6012
+ },
6013
+ {
6014
+ "model_format": "mlx",
6015
+ "model_size_in_billions": 12,
6016
+ "quantizations": [
6017
+ "4bit",
6018
+ "6bit",
6019
+ "8bit",
6020
+ "fp16"
6021
+ ],
6022
+ "model_id": "mlx-community/gemma-3-12b-it-{quantization}"
6023
+ },
6024
+ {
6025
+ "model_format": "mlx",
6026
+ "model_size_in_billions": 27,
6027
+ "quantizations": [
6028
+ "4bit",
6029
+ "6bit",
6030
+ "8bit",
6031
+ "fp16"
6032
+ ],
6033
+ "model_id": "mlx-community/gemma-3-27b-it-{quantization}"
6034
+ }
6035
+ ],
6036
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
6037
+ "stop_token_ids": [
6038
+ 1,
6039
+ 105,
6040
+ 106
6041
+ ],
6042
+ "stop": [
6043
+ "<eos>",
6044
+ "<end_of_turn>",
6045
+ "<start_of_turn>"
6046
+ ]
6047
+ },
5789
6048
  {
5790
6049
  "version": 1,
5791
6050
  "context_length": 8192,
@@ -6923,7 +7182,7 @@
6923
7182
  "8-bit",
6924
7183
  "none"
6925
7184
  ],
6926
- "model_id": "OpenGVLab/InternVL2_5-MPO-1B"
7185
+ "model_id": "OpenGVLab/InternVL2_5-1B-MPO"
6927
7186
  },
6928
7187
  {
6929
7188
  "model_format": "pytorch",
@@ -6933,7 +7192,7 @@
6933
7192
  "8-bit",
6934
7193
  "none"
6935
7194
  ],
6936
- "model_id": "OpenGVLab/InternVL2_5-MPO-2B"
7195
+ "model_id": "OpenGVLab/InternVL2_5-2B-MPO"
6937
7196
  },
6938
7197
  {
6939
7198
  "model_format": "pytorch",
@@ -6943,7 +7202,7 @@
6943
7202
  "8-bit",
6944
7203
  "none"
6945
7204
  ],
6946
- "model_id": "OpenGVLab/InternVL2_5-MPO-4B"
7205
+ "model_id": "OpenGVLab/InternVL2_5-4B-MPO"
6947
7206
  },
6948
7207
  {
6949
7208
  "model_format": "awq",
@@ -6961,7 +7220,7 @@
6961
7220
  "8-bit",
6962
7221
  "none"
6963
7222
  ],
6964
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B"
7223
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO"
6965
7224
  },
6966
7225
  {
6967
7226
  "model_format": "awq",
@@ -6969,7 +7228,7 @@
6969
7228
  "quantizations": [
6970
7229
  "Int4"
6971
7230
  ],
6972
- "model_id": "OpenGVLab/InternVL2_5-MPO-8B-AWQ"
7231
+ "model_id": "OpenGVLab/InternVL2_5-8B-MPO-AWQ"
6973
7232
  },
6974
7233
  {
6975
7234
  "model_format": "pytorch",
@@ -6979,7 +7238,7 @@
6979
7238
  "8-bit",
6980
7239
  "none"
6981
7240
  ],
6982
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B"
7241
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO"
6983
7242
  },
6984
7243
  {
6985
7244
  "model_format": "awq",
@@ -6987,7 +7246,7 @@
6987
7246
  "quantizations": [
6988
7247
  "Int4"
6989
7248
  ],
6990
- "model_id": "OpenGVLab/InternVL2_5-MPO-26B-AWQ"
7249
+ "model_id": "OpenGVLab/InternVL2_5-26B-MPO-AWQ"
6991
7250
  },
6992
7251
  {
6993
7252
  "model_format": "pytorch",
@@ -6997,7 +7256,7 @@
6997
7256
  "8-bit",
6998
7257
  "none"
6999
7258
  ],
7000
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B"
7259
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO"
7001
7260
  },
7002
7261
  {
7003
7262
  "model_format": "awq",
@@ -7005,7 +7264,7 @@
7005
7264
  "quantizations": [
7006
7265
  "Int4"
7007
7266
  ],
7008
- "model_id": "OpenGVLab/InternVL2_5-MPO-38B-AWQ"
7267
+ "model_id": "OpenGVLab/InternVL2_5-38B-MPO-AWQ"
7009
7268
  },
7010
7269
  {
7011
7270
  "model_format": "pytorch",
@@ -7015,7 +7274,7 @@
7015
7274
  "8-bit",
7016
7275
  "none"
7017
7276
  ],
7018
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B"
7277
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO"
7019
7278
  },
7020
7279
  {
7021
7280
  "model_format": "awq",
@@ -7023,7 +7282,7 @@
7023
7282
  "quantizations": [
7024
7283
  "Int4"
7025
7284
  ],
7026
- "model_id": "OpenGVLab/InternVL2_5-MPO-78B-AWQ"
7285
+ "model_id": "OpenGVLab/InternVL2_5-78B-MPO-AWQ"
7027
7286
  }
7028
7287
  ],
7029
7288
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -7892,7 +8151,7 @@
7892
8151
  "model_id": "mlx-community/DeepSeek-V3-{quantization}"
7893
8152
  }
7894
8153
  ],
7895
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁callbegin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|toolcalls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁ofsentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
8154
+ "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁calls▁begin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁callsend|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tooloutputsend|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
7896
8155
  "stop_token_ids": [
7897
8156
  1
7898
8157
  ],