xinference 1.6.0.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +79 -2
  3. xinference/client/restful/restful_client.py +65 -3
  4. xinference/conftest.py +0 -7
  5. xinference/core/media_interface.py +132 -8
  6. xinference/core/model.py +44 -6
  7. xinference/core/scheduler.py +1 -10
  8. xinference/core/supervisor.py +8 -17
  9. xinference/core/worker.py +5 -27
  10. xinference/deploy/cmdline.py +6 -2
  11. xinference/model/audio/chattts.py +24 -39
  12. xinference/model/audio/cosyvoice.py +18 -30
  13. xinference/model/audio/funasr.py +42 -0
  14. xinference/model/audio/model_spec.json +71 -1
  15. xinference/model/audio/model_spec_modelscope.json +76 -2
  16. xinference/model/audio/utils.py +75 -0
  17. xinference/model/core.py +1 -0
  18. xinference/model/embedding/__init__.py +74 -18
  19. xinference/model/embedding/core.py +98 -589
  20. xinference/model/embedding/embed_family.py +133 -0
  21. xinference/{thirdparty/omnilmm/train → model/embedding/flag}/__init__.py +1 -1
  22. xinference/model/embedding/flag/core.py +282 -0
  23. xinference/model/embedding/model_spec.json +24 -0
  24. xinference/model/embedding/model_spec_modelscope.json +24 -0
  25. xinference/model/embedding/sentence_transformers/__init__.py +13 -0
  26. xinference/model/embedding/sentence_transformers/core.py +399 -0
  27. xinference/model/embedding/vllm/core.py +95 -0
  28. xinference/model/image/model_spec.json +30 -3
  29. xinference/model/image/model_spec_modelscope.json +41 -2
  30. xinference/model/image/stable_diffusion/core.py +144 -53
  31. xinference/model/llm/__init__.py +6 -54
  32. xinference/model/llm/core.py +19 -5
  33. xinference/model/llm/llama_cpp/core.py +59 -3
  34. xinference/model/llm/llama_cpp/memory.py +457 -0
  35. xinference/model/llm/llm_family.json +247 -402
  36. xinference/model/llm/llm_family.py +88 -16
  37. xinference/model/llm/llm_family_modelscope.json +260 -421
  38. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  39. xinference/model/llm/sglang/core.py +8 -0
  40. xinference/model/llm/transformers/__init__.py +27 -6
  41. xinference/model/llm/transformers/chatglm.py +4 -2
  42. xinference/model/llm/transformers/core.py +49 -28
  43. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  44. xinference/model/llm/transformers/gemma3.py +119 -164
  45. xinference/model/llm/transformers/multimodal/__init__.py +13 -0
  46. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  47. xinference/model/llm/transformers/multimodal/core.py +205 -0
  48. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  49. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  50. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  51. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  52. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  53. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  54. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  55. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  56. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  57. xinference/model/llm/transformers/opt.py +4 -2
  58. xinference/model/llm/transformers/utils.py +6 -37
  59. xinference/model/llm/utils.py +11 -0
  60. xinference/model/llm/vllm/core.py +7 -0
  61. xinference/model/rerank/core.py +91 -3
  62. xinference/model/rerank/model_spec.json +24 -0
  63. xinference/model/rerank/model_spec_modelscope.json +24 -0
  64. xinference/model/rerank/utils.py +20 -2
  65. xinference/model/utils.py +38 -1
  66. xinference/model/video/diffusers.py +65 -3
  67. xinference/model/video/model_spec.json +31 -4
  68. xinference/model/video/model_spec_modelscope.json +32 -4
  69. xinference/web/ui/build/asset-manifest.json +6 -6
  70. xinference/web/ui/build/index.html +1 -1
  71. xinference/web/ui/build/static/css/main.013f296b.css +2 -0
  72. xinference/web/ui/build/static/css/main.013f296b.css.map +1 -0
  73. xinference/web/ui/build/static/js/main.8a9e3ba0.js +3 -0
  74. xinference/web/ui/build/static/js/main.8a9e3ba0.js.map +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/34cfbfb7836e136ba3261cfd411cc554bf99ba24b35dcceebeaa4f008cb3c9dc.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/55b9fb40b57fa926e8f05f31c2f96467e76e5ad62f033dca97c03f9e8c4eb4fe.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6595880facebca7ceace6f17cf21c3a5a9219a2f52fb0ba9f3cf1131eddbcf6b.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/aa998bc2d9c11853add6b8a2e08f50327f56d8824ccaaec92d6dde1b305f0d85.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/c748246b1d7bcebc16153be69f37e955bb2145526c47dd425aeeff70d3004dbc.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/e31234e95d60a5a7883fbcd70de2475dc1c88c90705df1a530abb68f86f80a51.json +1 -0
  82. xinference/web/ui/src/locales/en.json +21 -8
  83. xinference/web/ui/src/locales/ja.json +224 -0
  84. xinference/web/ui/src/locales/ko.json +224 -0
  85. xinference/web/ui/src/locales/zh.json +21 -8
  86. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/METADATA +14 -11
  87. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/RECORD +93 -100
  88. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/WHEEL +1 -1
  89. xinference/model/llm/transformers/cogvlm2.py +0 -442
  90. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  91. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  92. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  93. xinference/model/llm/transformers/intern_vl.py +0 -526
  94. xinference/model/llm/transformers/internlm2.py +0 -94
  95. xinference/model/llm/transformers/minicpmv25.py +0 -193
  96. xinference/model/llm/transformers/omnilmm.py +0 -132
  97. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  98. xinference/model/llm/transformers/qwen_vl.py +0 -360
  99. xinference/thirdparty/omnilmm/LICENSE +0 -201
  100. xinference/thirdparty/omnilmm/chat.py +0 -218
  101. xinference/thirdparty/omnilmm/constants.py +0 -4
  102. xinference/thirdparty/omnilmm/conversation.py +0 -332
  103. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  104. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  105. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  106. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  107. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  108. xinference/thirdparty/omnilmm/utils.py +0 -134
  109. xinference/web/ui/build/static/css/main.337afe76.css +0 -2
  110. xinference/web/ui/build/static/css/main.337afe76.css.map +0 -1
  111. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  112. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  117. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  118. xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +0 -1
  119. xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +0 -1
  120. /xinference/{thirdparty/omnilmm → model/embedding/vllm}/__init__.py +0 -0
  121. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.8a9e3ba0.js.LICENSE.txt} +0 -0
  122. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/entry_points.txt +0 -0
  123. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/licenses/LICENSE +0 -0
  124. {xinference-1.6.0.post1.dist-info → xinference-1.7.0.dist-info}/top_level.txt +0 -0
@@ -2856,47 +2856,6 @@
2856
2856
  "<|im_end|>"
2857
2857
  ]
2858
2858
  },
2859
- {
2860
- "version": 1,
2861
- "context_length": 4096,
2862
- "model_name": "deepseek-vl-chat",
2863
- "model_lang": [
2864
- "en",
2865
- "zh"
2866
- ],
2867
- "model_ability": [
2868
- "chat",
2869
- "vision"
2870
- ],
2871
- "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
2872
- "model_specs": [
2873
- {
2874
- "model_format": "pytorch",
2875
- "model_size_in_billions": "1_3",
2876
- "quantizations": [
2877
- "none"
2878
- ],
2879
- "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
2880
- "model_hub": "modelscope"
2881
- },
2882
- {
2883
- "model_format": "pytorch",
2884
- "model_size_in_billions": 7,
2885
- "quantizations": [
2886
- "none"
2887
- ],
2888
- "model_id": "deepseek-ai/deepseek-vl-7b-chat",
2889
- "model_hub": "modelscope"
2890
- }
2891
- ],
2892
- "chat_template": "",
2893
- "stop_token_ids": [
2894
- 100001
2895
- ],
2896
- "stop": [
2897
- "<|end▁of▁sentence|>"
2898
- ]
2899
- },
2900
2859
  {
2901
2860
  "version": 1,
2902
2861
  "context_length": 4096,
@@ -3111,53 +3070,6 @@
3111
3070
  }
3112
3071
  ]
3113
3072
  },
3114
- {
3115
- "version": 1,
3116
- "context_length": 4096,
3117
- "model_name": "qwen-vl-chat",
3118
- "model_lang": [
3119
- "en",
3120
- "zh"
3121
- ],
3122
- "model_ability": [
3123
- "chat",
3124
- "vision"
3125
- ],
3126
- "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
3127
- "model_specs": [
3128
- {
3129
- "model_format": "pytorch",
3130
- "model_size_in_billions": 7,
3131
- "quantizations": [
3132
- "none"
3133
- ],
3134
- "model_hub": "modelscope",
3135
- "model_id": "Qwen/Qwen-VL-Chat",
3136
- "model_revision": "master"
3137
- },
3138
- {
3139
- "model_format": "gptq",
3140
- "model_size_in_billions": 7,
3141
- "quantizations": [
3142
- "Int4"
3143
- ],
3144
- "model_hub": "modelscope",
3145
- "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
3146
- "model_revision": "master"
3147
- }
3148
- ],
3149
- "chat_template": "",
3150
- "stop_token_ids": [
3151
- 151643,
3152
- 151644,
3153
- 151645
3154
- ],
3155
- "stop": [
3156
- "<|endoftext|>",
3157
- "<|im_start|>",
3158
- "<|im_end|>"
3159
- ]
3160
- },
3161
3073
  {
3162
3074
  "version": 1,
3163
3075
  "context_length": 4096,
@@ -3330,6 +3242,11 @@
3330
3242
  "Q8_0",
3331
3243
  "bf16"
3332
3244
  ],
3245
+ "multimodal_projectors": [
3246
+ "mmproj-google_gemma-3-4b-it-f16.gguf",
3247
+ "mmproj-google_gemma-3-4b-it-f32.gguf",
3248
+ "mmproj-google_gemma-3-4b-it-bf16.gguf"
3249
+ ],
3333
3250
  "model_id": "bartowski/google_gemma-3-4b-it-GGUF",
3334
3251
  "model_file_name_template": "google_gemma-3-4b-it-{quantization}.gguf",
3335
3252
  "model_hub": "modelscope"
@@ -3353,6 +3270,11 @@
3353
3270
  "Q8_0",
3354
3271
  "bf16"
3355
3272
  ],
3273
+ "multimodal_projectors": [
3274
+ "mmproj-google_gemma-3-12b-it-f16.gguf",
3275
+ "mmproj-google_gemma-3-12b-it-f32.gguf",
3276
+ "mmproj-google_gemma-3-12b-it-bf16.gguf"
3277
+ ],
3356
3278
  "model_id": "bartowski/google_gemma-3-12b-it-GGUF",
3357
3279
  "model_file_name_template": "google_gemma-3-12b-it-{quantization}.gguf",
3358
3280
  "model_hub": "modelscope"
@@ -3376,6 +3298,11 @@
3376
3298
  "Q8_0",
3377
3299
  "bf16"
3378
3300
  ],
3301
+ "multimodal_projectors": [
3302
+ "mmproj-google_gemma-3-27b-it-f16.gguf",
3303
+ "mmproj-google_gemma-3-27b-it-f32.gguf",
3304
+ "mmproj-google_gemma-3-27b-it-bf16.gguf"
3305
+ ],
3379
3306
  "model_id": "bartowski/google_gemma-3-27b-it-GGUF",
3380
3307
  "model_file_name_template": "google_gemma-3-27b-it-{quantization}.gguf",
3381
3308
  "model_hub": "modelscope"
@@ -3429,49 +3356,6 @@
3429
3356
  "<start_of_turn>"
3430
3357
  ]
3431
3358
  },
3432
- {
3433
- "version": 1,
3434
- "context_length": 2048,
3435
- "model_name": "OmniLMM",
3436
- "model_lang": [
3437
- "en",
3438
- "zh"
3439
- ],
3440
- "model_ability": [
3441
- "chat",
3442
- "vision"
3443
- ],
3444
- "model_description": "OmniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
3445
- "model_specs": [
3446
- {
3447
- "model_format": "pytorch",
3448
- "model_size_in_billions": 3,
3449
- "quantizations": [
3450
- "none"
3451
- ],
3452
- "model_id": "OpenBMB/MiniCPM-V",
3453
- "model_hub": "modelscope",
3454
- "model_revision": "master"
3455
- },
3456
- {
3457
- "model_format": "pytorch",
3458
- "model_size_in_billions": 12,
3459
- "quantizations": [
3460
- "none"
3461
- ],
3462
- "model_id": "OpenBMB/OmniLMM-12B",
3463
- "model_hub": "modelscope",
3464
- "model_revision": "master"
3465
- }
3466
- ],
3467
- "chat_template": "",
3468
- "stop_token_ids": [
3469
- 2
3470
- ],
3471
- "stop": [
3472
- "</s>"
3473
- ]
3474
- },
3475
3359
  {
3476
3360
  "version": 1,
3477
3361
  "context_length": 4096,
@@ -3637,49 +3521,6 @@
3637
3521
  "</s>"
3638
3522
  ]
3639
3523
  },
3640
- {
3641
- "version": 1,
3642
- "context_length": 8192,
3643
- "model_name": "MiniCPM-Llama3-V-2_5",
3644
- "model_lang": [
3645
- "en",
3646
- "zh"
3647
- ],
3648
- "model_ability": [
3649
- "chat",
3650
- "vision"
3651
- ],
3652
- "model_description": "MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters.",
3653
- "model_specs": [
3654
- {
3655
- "model_format": "pytorch",
3656
- "model_size_in_billions": 8,
3657
- "quantizations": [
3658
- "none"
3659
- ],
3660
- "model_hub": "modelscope",
3661
- "model_id": "OpenBMB/MiniCPM-Llama3-V-2_5",
3662
- "model_revision": "master"
3663
- },
3664
- {
3665
- "model_format": "pytorch",
3666
- "model_size_in_billions": 8,
3667
- "quantizations": [
3668
- "none"
3669
- ],
3670
- "model_hub": "modelscope",
3671
- "model_id": "OpenBMB/MiniCPM-Llama3-V-2_5-{quantization}",
3672
- "model_revision": "master"
3673
- }
3674
- ],
3675
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
3676
- "stop_token_ids": [
3677
- 128001
3678
- ],
3679
- "stop": [
3680
- "<|end_of_text|>"
3681
- ]
3682
- },
3683
3524
  {
3684
3525
  "version": 1,
3685
3526
  "context_length": 32768,
@@ -3951,86 +3792,6 @@
3951
3792
  "<|im_end|>"
3952
3793
  ]
3953
3794
  },
3954
- {
3955
- "version": 1,
3956
- "context_length": 8192,
3957
- "model_name": "cogvlm2",
3958
- "model_lang": [
3959
- "en",
3960
- "zh"
3961
- ],
3962
- "model_ability": [
3963
- "chat",
3964
- "vision"
3965
- ],
3966
- "model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
3967
- "model_specs": [
3968
- {
3969
- "model_format": "pytorch",
3970
- "model_size_in_billions": 20,
3971
- "quantizations": [
3972
- "none"
3973
- ],
3974
- "model_hub": "modelscope",
3975
- "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B",
3976
- "model_revision": "master"
3977
- },
3978
- {
3979
- "model_format": "pytorch",
3980
- "model_size_in_billions": 20,
3981
- "quantizations": [
3982
- "none"
3983
- ],
3984
- "model_hub": "modelscope",
3985
- "model_id": "ZhipuAI/cogvlm2-llama3-chinese-chat-19B-{quantization}",
3986
- "model_revision": "master"
3987
- }
3988
- ],
3989
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
3990
- "stop_token_ids": [
3991
- 128001,
3992
- 128009
3993
- ],
3994
- "stop": [
3995
- "<|end_of_text|>",
3996
- "<|eot_id|>"
3997
- ]
3998
- },
3999
- {
4000
- "version": 1,
4001
- "context_length": 8192,
4002
- "model_name": "cogvlm2-video-llama3-chat",
4003
- "model_lang": [
4004
- "en",
4005
- "zh"
4006
- ],
4007
- "model_ability": [
4008
- "chat",
4009
- "vision"
4010
- ],
4011
- "model_description": "CogVLM2-Video achieves state-of-the-art performance on multiple video question answering tasks.",
4012
- "model_specs": [
4013
- {
4014
- "model_format": "pytorch",
4015
- "model_size_in_billions": 12,
4016
- "quantizations": [
4017
- "none"
4018
- ],
4019
- "model_hub": "modelscope",
4020
- "model_id": "ZhipuAI/cogvlm2-video-llama3-chat",
4021
- "model_revision": "master"
4022
- }
4023
- ],
4024
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = '<|begin_of_text|>' + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ '<|end_of_text|>' }}{% endif %}",
4025
- "stop_token_ids": [
4026
- 128001,
4027
- 128009
4028
- ],
4029
- "stop": [
4030
- "<|end_of_text|>",
4031
- "<|eot_id|>"
4032
- ]
4033
- },
4034
3795
  {
4035
3796
  "version": 1,
4036
3797
  "context_length": 8192,
@@ -4516,6 +4277,56 @@
4516
4277
  "</s>"
4517
4278
  ]
4518
4279
  },
4280
+ {
4281
+ "version": 1,
4282
+ "context_length": 32768,
4283
+ "model_name": "minicpm4",
4284
+ "model_lang": [
4285
+ "zh"
4286
+ ],
4287
+ "model_ability": [
4288
+ "chat"
4289
+ ],
4290
+ "model_description": "MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.",
4291
+ "model_specs": [
4292
+ {
4293
+ "model_format": "pytorch",
4294
+ "model_size_in_billions": "0_5",
4295
+ "quantizations": [
4296
+ "none"
4297
+ ],
4298
+ "model_id": "OpenBMB/MiniCPM4-0.5B",
4299
+ "model_hub": "modelscope"
4300
+ },
4301
+ {
4302
+ "model_format": "pytorch",
4303
+ "model_size_in_billions": 8,
4304
+ "quantizations": [
4305
+ "none"
4306
+ ],
4307
+ "model_id": "OpenBMB/MiniCPM4-8B",
4308
+ "model_hub": "modelscope"
4309
+ },
4310
+ {
4311
+ "model_format": "mlx",
4312
+ "model_size_in_billions": 8,
4313
+ "quantizations": [
4314
+ "4bit"
4315
+ ],
4316
+ "model_id": "mlx-community/MiniCPM4-8B-4bit",
4317
+ "model_hub": "modelscope"
4318
+ }
4319
+ ],
4320
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4321
+ "stop_token_ids": [
4322
+ 2,
4323
+ 73440
4324
+ ],
4325
+ "stop": [
4326
+ "</s>",
4327
+ "<|im_end|>"
4328
+ ]
4329
+ },
4519
4330
  {
4520
4331
  "version": 1,
4521
4332
  "context_length": 32768,
@@ -4584,41 +4395,6 @@
4584
4395
  ]
4585
4396
  }
4586
4397
  },
4587
- {
4588
- "version": 1,
4589
- "context_length": 128000,
4590
- "model_name": "deepseek-v2",
4591
- "model_lang": [
4592
- "en",
4593
- "zh"
4594
- ],
4595
- "model_ability": [
4596
- "chat"
4597
- ],
4598
- "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
4599
- "model_specs": [
4600
- {
4601
- "model_format": "pytorch",
4602
- "model_size_in_billions": 16,
4603
- "quantizations": [
4604
- "none"
4605
- ],
4606
- "model_id": "deepseek-ai/DeepSeek-V2-Lite",
4607
- "model_hub": "modelscope",
4608
- "model_revision": "master"
4609
- },
4610
- {
4611
- "model_format": "pytorch",
4612
- "model_size_in_billions": 236,
4613
- "quantizations": [
4614
- "none"
4615
- ],
4616
- "model_id": "deepseek-ai/DeepSeek-V2",
4617
- "model_hub": "modelscope",
4618
- "model_revision": "master"
4619
- }
4620
- ]
4621
- },
4622
4398
  {
4623
4399
  "version": 1,
4624
4400
  "context_length": 128000,
@@ -4856,17 +4632,57 @@
4856
4632
  }
4857
4633
  },
4858
4634
  {
4859
- "model_format": "mlx",
4635
+ "model_format": "mlx",
4636
+ "model_size_in_billions": 671,
4637
+ "quantizations": [
4638
+ "3bit",
4639
+ "4bit"
4640
+ ],
4641
+ "model_id": "mlx-community/DeepSeek-V3-{quantization}",
4642
+ "model_hub": "modelscope"
4643
+ }
4644
+ ],
4645
+ "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁calls▁begin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁calls▁end|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tool▁outputs▁end|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
4646
+ "stop_token_ids": [
4647
+ 1
4648
+ ],
4649
+ "stop": [
4650
+ "<|end▁of▁sentence|>"
4651
+ ]
4652
+ },
4653
+ {
4654
+ "version": 1,
4655
+ "context_length": 163840,
4656
+ "model_name": "deepseek-v3-0324",
4657
+ "model_lang": [
4658
+ "en",
4659
+ "zh"
4660
+ ],
4661
+ "model_ability": [
4662
+ "chat"
4663
+ ],
4664
+ "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
4665
+ "model_specs": [
4666
+ {
4667
+ "model_format": "pytorch",
4668
+ "model_size_in_billions": 671,
4669
+ "quantizations": [
4670
+ "none"
4671
+ ],
4672
+ "model_id": "deepseek-ai/DeepSeek-V3-0324",
4673
+ "model_hub": "modelscope"
4674
+ },
4675
+ {
4676
+ "model_format": "awq",
4860
4677
  "model_size_in_billions": 671,
4861
4678
  "quantizations": [
4862
- "3bit",
4863
- "4bit"
4679
+ "Int4"
4864
4680
  ],
4865
- "model_id": "mlx-community/DeepSeek-V3-{quantization}",
4681
+ "model_id": "cognitivecomputations/DeepSeek-V3-0324-AWQ",
4866
4682
  "model_hub": "modelscope"
4867
4683
  }
4868
4684
  ],
4869
- "chat_template": "{% if messages %} {% if system or tools %} {% if system %} {{ system }} {% endif %} {% if tools %} {# Handle tools here if needed #} {% endif %} {% endif %} {% for message in messages %} {% set last = loop.index == loop.length %} {% if message.role == \"user\" %} <|User|> {% if tools and last %} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}. Do not use variables. {{ tools }} {% endif %} {{ message.content }} {% if last %} <|Assistant|> {% endif %} {% elif message.role == \"assistant\" %} <|Assistant|> {% if message.tool_calls %} <|tool▁callsbegin|> {% for tool in message.tool_calls %} <|tool▁call▁begin|> {\"name\": \"{{ tool.function.name }}\", \"parameters\": {{ tool.function.arguments }}} <|tool▁call▁end|> {% endfor %} <|tool▁calls▁end|> {% else %} {{ message.content }} {% if not last %} <|end▁of▁sentence|> {% endif %} {% endif %} {% elif message.role == \"tool\" %} <|tool▁outputs▁begin|> <|tool▁output▁begin|> {{ message.content }} <|tool▁output▁end|> <|tool▁outputs▁end|> {% if last and message.role != \"assistant\" %} <|Assistant|> {% endif %} {% endif %} {% endfor %} {% else %} {% if system %} {{ system }} {% endif %} {% if prompt %} <|User|> {{ prompt }} {% endif %} <|Assistant|> {{ response }} {% if response %} {{ response }} {% endif %} {% endif %}",
4685
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁callend|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
4870
4686
  "stop_token_ids": [
4871
4687
  1
4872
4688
  ],
@@ -5095,6 +4911,156 @@
5095
4911
  "reasoning_start_tag": "<think>",
5096
4912
  "reasoning_end_tag": "</think>"
5097
4913
  },
4914
+ {
4915
+ "version": 1,
4916
+ "context_length": 163840,
4917
+ "model_name": "deepseek-r1-0528",
4918
+ "model_lang": [
4919
+ "en",
4920
+ "zh"
4921
+ ],
4922
+ "model_ability": [
4923
+ "chat",
4924
+ "reasoning"
4925
+ ],
4926
+ "model_description": "The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528. In the latest update, DeepSeek R1 has significantly improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. The model has demonstrated outstanding performance across various benchmark evaluations, including mathematics, programming, and general logic. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro",
4927
+ "model_specs": [
4928
+ {
4929
+ "model_format": "pytorch",
4930
+ "model_size_in_billions": 671,
4931
+ "quantizations": [
4932
+ "none"
4933
+ ],
4934
+ "model_id": "deepseek-ai/DeepSeek-R1-0528",
4935
+ "model_hub": "modelscope"
4936
+ },
4937
+ {
4938
+ "model_format": "gptq",
4939
+ "model_size_in_billions": 671,
4940
+ "quantizations": [
4941
+ "Int4-Int8Mix-Lite",
4942
+ "Int4-Int8Mix-Compact",
4943
+ "Int4-Int8Mix-Medium"
4944
+ ],
4945
+ "model_id": "tclf90/DeepSeek-R1-0528-GPTQ-{quantization}",
4946
+ "model_hub": "modelscope"
4947
+ }
4948
+ ],
4949
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
4950
+ "stop_token_ids": [
4951
+ 1
4952
+ ],
4953
+ "stop": [
4954
+ "<|end▁of▁sentence|>"
4955
+ ],
4956
+ "reasoning_start_tag": "<think>",
4957
+ "reasoning_end_tag": "</think>"
4958
+ },
4959
+ {
4960
+ "version": 1,
4961
+ "context_length": 131072,
4962
+ "model_name": "deepseek-r1-0528-qwen3",
4963
+ "model_lang": [
4964
+ "en",
4965
+ "zh"
4966
+ ],
4967
+ "model_ability": [
4968
+ "chat",
4969
+ "reasoning"
4970
+ ],
4971
+ "model_description": "The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528. In the latest update, DeepSeek R1 has significantly improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. The model has demonstrated outstanding performance across various benchmark evaluations, including mathematics, programming, and general logic. Its overall performance is now approaching that of leading models, such as O3 and Gemini 2.5 Pro",
4972
+ "model_specs": [
4973
+ {
4974
+ "model_format": "pytorch",
4975
+ "model_size_in_billions": 8,
4976
+ "quantizations": [
4977
+ "none"
4978
+ ],
4979
+ "model_id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
4980
+ "model_hub": "modelscope"
4981
+ },
4982
+ {
4983
+ "model_format": "gptq",
4984
+ "model_size_in_billions": 8,
4985
+ "quantizations": [
4986
+ "Int4-W4A16",
4987
+ "Int8-W8A16"
4988
+ ],
4989
+ "model_id": "okwinds/DeepSeek-R1-0528-Qwen3-8B-{quantization}",
4990
+ "model_hub": "modelscope"
4991
+ },
4992
+ {
4993
+ "model_format": "gptq",
4994
+ "model_size_in_billions": 8,
4995
+ "quantizations": [
4996
+ "Int4-Int8Mix"
4997
+ ],
4998
+ "model_id": "tclf90/DeepSeek-R1-0528-Qwen3-8B-GPTQ-Int4-Int8Mix",
4999
+ "model_hub": "modelscope"
5000
+ }
5001
+ ],
5002
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
5003
+ "stop_token_ids": [
5004
+ 151645
5005
+ ],
5006
+ "stop": [
5007
+ "<|end▁of▁sentence|>"
5008
+ ],
5009
+ "reasoning_start_tag": "<think>",
5010
+ "reasoning_end_tag": "</think>"
5011
+ },
5012
+ {
5013
+ "version": 1,
5014
+ "context_length": 163840,
5015
+ "model_name": "deepseek-prover-v2",
5016
+ "model_lang": [
5017
+ "en",
5018
+ "zh"
5019
+ ],
5020
+ "model_ability": [
5021
+ "chat",
5022
+ "reasoning"
5023
+ ],
5024
+ "model_description": "We introduce DeepSeek-Prover-V2, an open-source large language model designed for formal theorem proving in Lean 4, with initialization data collected through a recursive theorem proving pipeline powered by DeepSeek-V3. The cold-start training procedure begins by prompting DeepSeek-V3 to decompose complex problems into a series of subgoals. The proofs of resolved subgoals are synthesized into a chain-of-thought process, combined with DeepSeek-V3's step-by-step reasoning, to create an initial cold start for reinforcement learning. This process enables us to integrate both informal and formal mathematical reasoning into a unified model",
5025
+ "model_specs": [
5026
+ {
5027
+ "model_format": "pytorch",
5028
+ "model_size_in_billions": 671,
5029
+ "quantizations": [
5030
+ "none"
5031
+ ],
5032
+ "model_id": "deepseek-ai/DeepSeek-Prover-V2-671B",
5033
+ "model_hub": "modelscope"
5034
+ },
5035
+ {
5036
+ "model_format": "pytorch",
5037
+ "model_size_in_billions": 7,
5038
+ "quantizations": [
5039
+ "none"
5040
+ ],
5041
+ "model_id": "deepseek-ai/DeepSeek-Prover-V2-7B",
5042
+ "model_hub": "modelscope"
5043
+ },
5044
+ {
5045
+ "model_format": "mlx",
5046
+ "model_size_in_billions": 7,
5047
+ "quantizations": [
5048
+ "4bit"
5049
+ ],
5050
+ "model_id": "mlx-community/DeepSeek-Prover-V2-7B-4bit",
5051
+ "model_hub": "modelscope"
5052
+ }
5053
+ ],
5054
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
5055
+ "stop_token_ids": [
5056
+ 1
5057
+ ],
5058
+ "stop": [
5059
+ "<|end▁of▁sentence|>"
5060
+ ],
5061
+ "reasoning_start_tag": "<think>",
5062
+ "reasoning_end_tag": "</think>"
5063
+ },
5098
5064
  {
5099
5065
  "version": 1,
5100
5066
  "context_length": 32768,
@@ -7004,133 +6970,6 @@
7004
6970
  "<|observation|>"
7005
6971
  ]
7006
6972
  },
7007
- {
7008
- "version": 1,
7009
- "context_length": 8192,
7010
- "model_name": "glm-edge-v",
7011
- "model_lang": [
7012
- "en",
7013
- "zh"
7014
- ],
7015
- "model_ability": [
7016
- "chat",
7017
- "vision"
7018
- ],
7019
- "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
7020
- "model_specs": [
7021
- {
7022
- "model_format": "pytorch",
7023
- "model_size_in_billions": "2",
7024
- "quantizations": [
7025
- "none"
7026
- ],
7027
- "model_id": "ZhipuAI/glm-edge-v-2b",
7028
- "model_hub": "modelscope"
7029
- },
7030
- {
7031
- "model_format": "pytorch",
7032
- "model_size_in_billions": "5",
7033
- "quantizations": [
7034
- "none"
7035
- ],
7036
- "model_id": "ZhipuAI/glm-edge-v-5b",
7037
- "model_hub": "modelscope"
7038
- },
7039
- {
7040
- "model_format": "ggufv2",
7041
- "model_size_in_billions": "2",
7042
- "quantizations": [
7043
- "Q4_0",
7044
- "Q4_1",
7045
- "Q4_K",
7046
- "Q4_K_M",
7047
- "Q4_K_S",
7048
- "Q5_0",
7049
- "Q5_1",
7050
- "Q5_K",
7051
- "Q5_K_M",
7052
- "Q5_K_S",
7053
- "Q6_K",
7054
- "Q8_0"
7055
- ],
7056
- "model_file_name_template": "ggml-model-{quantization}.gguf",
7057
- "model_hub": "modelscope",
7058
- "model_id": "ZhipuAI/glm-edge-v-2b-gguf"
7059
- },
7060
- {
7061
- "model_format": "ggufv2",
7062
- "model_size_in_billions": "2",
7063
- "quantizations": [
7064
- "F16"
7065
- ],
7066
- "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
7067
- "model_hub": "modelscope",
7068
- "model_id": "ZhipuAI/glm-edge-v-2b-gguf"
7069
- },
7070
- {
7071
- "model_format": "ggufv2",
7072
- "model_size_in_billions": "2",
7073
- "quantizations": [
7074
- "f16"
7075
- ],
7076
- "model_file_name_template": "mmproj-model-{quantization}.gguf",
7077
- "model_hub": "modelscope",
7078
- "model_id": "ZhipuAI/glm-edge-v-2b-gguf"
7079
- },
7080
- {
7081
- "model_format": "ggufv2",
7082
- "model_size_in_billions": "5",
7083
- "quantizations": [
7084
- "Q4_0",
7085
- "Q4_1",
7086
- "Q4_K",
7087
- "Q4_K_M",
7088
- "Q4_K_S",
7089
- "Q5_0",
7090
- "Q5_1",
7091
- "Q5_K",
7092
- "Q5_K_M",
7093
- "Q5_K_S",
7094
- "Q6_K",
7095
- "Q8_0"
7096
- ],
7097
- "model_file_name_template": "ggml-model-{quantization}.gguf",
7098
- "model_hub": "modelscope",
7099
- "model_id": "ZhipuAI/glm-edge-v-5b-gguf"
7100
- },
7101
- {
7102
- "model_format": "ggufv2",
7103
- "model_size_in_billions": "5",
7104
- "quantizations": [
7105
- "F16"
7106
- ],
7107
- "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
7108
- "model_hub": "modelscope",
7109
- "model_id": "ZhipuAI/glm-edge-v-5b-gguf"
7110
- },
7111
- {
7112
- "model_format": "ggufv2",
7113
- "model_size_in_billions": "5",
7114
- "quantizations": [
7115
- "f16"
7116
- ],
7117
- "model_file_name_template": "mmproj-model-{quantization}.gguf",
7118
- "model_hub": "modelscope",
7119
- "model_id": "ZhipuAI/glm-edge-v-5b-gguf"
7120
- }
7121
- ],
7122
- "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
7123
- "stop_token_ids": [
7124
- 59246,
7125
- 59253,
7126
- 59255
7127
- ],
7128
- "stop": [
7129
- "<|endoftext|>",
7130
- "<|user|>",
7131
- "<|observation|>"
7132
- ]
7133
- },
7134
6973
  {
7135
6974
  "version": 1,
7136
6975
  "context_length": 32768,