xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (80) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -7
  3. xinference/client/handlers.py +3 -0
  4. xinference/client/restful/restful_client.py +9 -1
  5. xinference/core/model.py +19 -0
  6. xinference/core/resource.py +7 -1
  7. xinference/core/scheduler.py +4 -7
  8. xinference/core/status_guard.py +1 -0
  9. xinference/core/supervisor.py +228 -19
  10. xinference/core/utils.py +1 -29
  11. xinference/core/worker.py +28 -2
  12. xinference/deploy/cmdline.py +33 -3
  13. xinference/deploy/local.py +2 -1
  14. xinference/deploy/test/test_cmdline.py +32 -0
  15. xinference/device_utils.py +43 -1
  16. xinference/model/audio/core.py +5 -0
  17. xinference/model/audio/kokoro.py +122 -0
  18. xinference/model/audio/model_spec.json +8 -0
  19. xinference/model/audio/model_spec_modelscope.json +9 -0
  20. xinference/model/image/stable_diffusion/core.py +15 -6
  21. xinference/model/llm/llama_cpp/core.py +21 -14
  22. xinference/model/llm/llm_family.json +866 -46
  23. xinference/model/llm/llm_family.py +7 -2
  24. xinference/model/llm/llm_family_modelscope.json +873 -16
  25. xinference/model/llm/mlx/core.py +11 -3
  26. xinference/model/llm/reasoning_parsers/__init__.py +13 -0
  27. xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
  28. xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
  29. xinference/model/llm/sglang/core.py +99 -11
  30. xinference/model/llm/transformers/core.py +9 -1
  31. xinference/model/llm/transformers/intern_vl.py +23 -14
  32. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  33. xinference/model/llm/transformers/qwen2_vl.py +20 -3
  34. xinference/model/llm/transformers/utils.py +22 -11
  35. xinference/model/llm/utils.py +164 -20
  36. xinference/model/llm/vllm/core.py +36 -4
  37. xinference/model/llm/vllm/xavier/executor.py +2 -2
  38. xinference/model/llm/vllm/xavier/scheduler.py +3 -3
  39. xinference/thirdparty/internvl/conversation.py +26 -17
  40. xinference/types.py +2 -0
  41. xinference/web/ui/build/asset-manifest.json +6 -6
  42. xinference/web/ui/build/index.html +1 -1
  43. xinference/web/ui/build/static/css/main.f8177338.css +2 -0
  44. xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
  45. xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
  46. xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
  59. xinference/web/ui/src/locales/en.json +14 -1
  60. xinference/web/ui/src/locales/zh.json +14 -1
  61. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
  62. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
  63. xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
  64. xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
  65. xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
  66. xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
  67. xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
  68. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
  70. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
  71. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
  72. xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
  73. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
  74. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
  75. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
  76. /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
  77. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
  78. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
  79. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
  80. {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
@@ -4497,6 +4497,179 @@
4497
4497
  "stop_token_ids": [],
4498
4498
  "stop": []
4499
4499
  },
4500
+ {
4501
+ "version": 1,
4502
+ "context_length": 16384,
4503
+ "model_name": "InternVL2.5",
4504
+ "model_lang": [
4505
+ "en",
4506
+ "zh"
4507
+ ],
4508
+ "model_ability": [
4509
+ "chat",
4510
+ "vision"
4511
+ ],
4512
+ "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
4513
+ "model_specs": [
4514
+ {
4515
+ "model_format": "pytorch",
4516
+ "model_size_in_billions": 1,
4517
+ "quantizations": [
4518
+ "4-bit",
4519
+ "8-bit",
4520
+ "none"
4521
+ ],
4522
+ "model_hub": "modelscope",
4523
+ "model_id": "OpenGVLab/InternVL2_5-1B",
4524
+ "model_revision": "master"
4525
+ },
4526
+ {
4527
+ "model_format": "awq",
4528
+ "model_size_in_billions": 1,
4529
+ "quantizations": [
4530
+ "Int4"
4531
+ ],
4532
+ "model_hub": "modelscope",
4533
+ "model_id": "OpenGVLab/InternVL2_5-1B-AWQ",
4534
+ "model_revision": "master"
4535
+ },
4536
+ {
4537
+ "model_format": "pytorch",
4538
+ "model_size_in_billions": 2,
4539
+ "quantizations": [
4540
+ "4-bit",
4541
+ "8-bit",
4542
+ "none"
4543
+ ],
4544
+ "model_hub": "modelscope",
4545
+ "model_id": "OpenGVLab/InternVL2_5-2B",
4546
+ "model_revision": "master"
4547
+ },
4548
+ {
4549
+ "model_format": "awq",
4550
+ "model_size_in_billions": 2,
4551
+ "quantizations": [
4552
+ "Int4"
4553
+ ],
4554
+ "model_hub": "modelscope",
4555
+ "model_id": "OpenGVLab/InternVL2_5-2B-AWQ",
4556
+ "model_revision": "master"
4557
+ },
4558
+ {
4559
+ "model_format": "pytorch",
4560
+ "model_size_in_billions": 4,
4561
+ "quantizations": [
4562
+ "4-bit",
4563
+ "8-bit",
4564
+ "none"
4565
+ ],
4566
+ "model_hub": "modelscope",
4567
+ "model_id": "OpenGVLab/InternVL2_5-4B",
4568
+ "model_revision": "master"
4569
+ },
4570
+ {
4571
+ "model_format": "awq",
4572
+ "model_size_in_billions": 4,
4573
+ "quantizations": [
4574
+ "Int4"
4575
+ ],
4576
+ "model_hub": "modelscope",
4577
+ "model_id": "OpenGVLab/InternVL2_5-4B-AWQ",
4578
+ "model_revision": "master"
4579
+ },
4580
+ {
4581
+ "model_format": "pytorch",
4582
+ "model_size_in_billions": 8,
4583
+ "quantizations": [
4584
+ "4-bit",
4585
+ "8-bit",
4586
+ "none"
4587
+ ],
4588
+ "model_hub": "modelscope",
4589
+ "model_id": "OpenGVLab/InternVL2_5-8B",
4590
+ "model_revision": "master"
4591
+ },
4592
+ {
4593
+ "model_format": "awq",
4594
+ "model_size_in_billions": 8,
4595
+ "quantizations": [
4596
+ "Int4"
4597
+ ],
4598
+ "model_hub": "modelscope",
4599
+ "model_id": "OpenGVLab/InternVL2_5-8B-AWQ",
4600
+ "model_revision": "master"
4601
+ },
4602
+ {
4603
+ "model_format": "pytorch",
4604
+ "model_size_in_billions": 26,
4605
+ "quantizations": [
4606
+ "4-bit",
4607
+ "8-bit",
4608
+ "none"
4609
+ ],
4610
+ "model_hub": "modelscope",
4611
+ "model_id": "OpenGVLab/InternVL2_5-26B",
4612
+ "model_revision": "master"
4613
+ },
4614
+ {
4615
+ "model_format": "awq",
4616
+ "model_size_in_billions": 26,
4617
+ "quantizations": [
4618
+ "Int4"
4619
+ ],
4620
+ "model_hub": "modelscope",
4621
+ "model_id": "OpenGVLab/InternVL2_5-26B-AWQ",
4622
+ "model_revision": "master"
4623
+ },
4624
+ {
4625
+ "model_format": "pytorch",
4626
+ "model_size_in_billions": 38,
4627
+ "quantizations": [
4628
+ "4-bit",
4629
+ "8-bit",
4630
+ "none"
4631
+ ],
4632
+ "model_hub": "modelscope",
4633
+ "model_id": "OpenGVLab/InternVL2_5-38B",
4634
+ "model_revision": "master"
4635
+ },
4636
+ {
4637
+ "model_format": "awq",
4638
+ "model_size_in_billions": 38,
4639
+ "quantizations": [
4640
+ "Int4"
4641
+ ],
4642
+ "model_hub": "modelscope",
4643
+ "model_id": "OpenGVLab/InternVL2_5-38B-AWQ",
4644
+ "model_revision": "master"
4645
+ },
4646
+ {
4647
+ "model_format": "pytorch",
4648
+ "model_size_in_billions": 78,
4649
+ "quantizations": [
4650
+ "4-bit",
4651
+ "8-bit",
4652
+ "none"
4653
+ ],
4654
+ "model_hub": "modelscope",
4655
+ "model_id": "OpenGVLab/InternVL2_5-78B",
4656
+ "model_revision": "master"
4657
+ },
4658
+ {
4659
+ "model_format": "awq",
4660
+ "model_size_in_billions": 78,
4661
+ "quantizations": [
4662
+ "Int4"
4663
+ ],
4664
+ "model_hub": "modelscope",
4665
+ "model_id": "OpenGVLab/InternVL2_5-78B-AWQ",
4666
+ "model_revision": "master"
4667
+ }
4668
+ ],
4669
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4670
+ "stop_token_ids": [],
4671
+ "stop": []
4672
+ },
4500
4673
  {
4501
4674
  "version": 1,
4502
4675
  "context_length": 8192,
@@ -4769,10 +4942,11 @@
4769
4942
  "model_format":"mlx",
4770
4943
  "model_size_in_billions":2,
4771
4944
  "quantizations":[
4945
+ "4bit",
4772
4946
  "8bit"
4773
4947
  ],
4774
4948
  "model_hub": "modelscope",
4775
- "model_id":"okwinds/Qwen2-VL-2B-Instruct-MLX-8bit",
4949
+ "model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}",
4776
4950
  "model_revision":"master"
4777
4951
  },
4778
4952
  {
@@ -4825,6 +4999,97 @@
4825
4999
  "<|endoftext|>"
4826
5000
  ]
4827
5001
  },
5002
+ {
5003
+ "version":1,
5004
+ "context_length":128000,
5005
+ "model_name":"qwen2.5-vl-instruct",
5006
+ "model_lang":[
5007
+ "en",
5008
+ "zh"
5009
+ ],
5010
+ "model_ability":[
5011
+ "chat",
5012
+ "vision"
5013
+ ],
5014
+ "model_description":"Qwen2.5-VL: Qwen2.5-VL is the latest version of the vision language models in the Qwen model familities.",
5015
+ "model_specs":[
5016
+ {
5017
+ "model_format":"pytorch",
5018
+ "model_size_in_billions":3,
5019
+ "quantizations":[
5020
+ "none"
5021
+ ],
5022
+ "model_hub": "modelscope",
5023
+ "model_id":"qwen/Qwen2.5-VL-3B-Instruct"
5024
+ },
5025
+ {
5026
+ "model_format":"pytorch",
5027
+ "model_size_in_billions":7,
5028
+ "quantizations":[
5029
+ "none"
5030
+ ],
5031
+ "model_hub": "modelscope",
5032
+ "model_id":"qwen/Qwen2.5-VL-7B-Instruct"
5033
+ },
5034
+ {
5035
+ "model_format":"pytorch",
5036
+ "model_size_in_billions":72,
5037
+ "quantizations":[
5038
+ "none"
5039
+ ],
5040
+ "model_hub": "modelscope",
5041
+ "model_id":"qwen/Qwen2.5-VL-72B-Instruct"
5042
+ },
5043
+ {
5044
+ "model_format":"mlx",
5045
+ "model_size_in_billions":3,
5046
+ "quantizations":[
5047
+ "3bit",
5048
+ "4bit",
5049
+ "6bit",
5050
+ "8bit",
5051
+ "bf16"
5052
+ ],
5053
+ "model_hub": "modelscope",
5054
+ "model_id":"mlx-community/Qwen2.5-VL-3B-Instruct-{quantization}"
5055
+ },
5056
+ {
5057
+ "model_format":"mlx",
5058
+ "model_size_in_billions":7,
5059
+ "quantizations":[
5060
+ "3bit",
5061
+ "4bit",
5062
+ "6bit",
5063
+ "8bit",
5064
+ "bf16"
5065
+ ],
5066
+ "model_hub": "modelscope",
5067
+ "model_id":"mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
5068
+ },
5069
+ {
5070
+ "model_format":"mlx",
5071
+ "model_size_in_billions":72,
5072
+ "quantizations":[
5073
+ "3bit",
5074
+ "4bit",
5075
+ "6bit",
5076
+ "8bit",
5077
+ "bf16"
5078
+ ],
5079
+ "model_hub": "modelscope",
5080
+ "model_id":"mlx-community/Qwen2.5-VL-72B-Instruct-{quantization}"
5081
+ }
5082
+ ],
5083
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
5084
+ "stop_token_ids": [
5085
+ 151645,
5086
+ 151643
5087
+ ],
5088
+ "stop": [
5089
+ "<|im_end|>",
5090
+ "<|endoftext|>"
5091
+ ]
5092
+ },
4828
5093
  {
4829
5094
  "version": 1,
4830
5095
  "context_length": 32768,
@@ -5069,21 +5334,395 @@
5069
5334
  "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
5070
5335
  "model_specs": [
5071
5336
  {
5072
- "model_format": "pytorch",
5073
- "model_size_in_billions": 236,
5337
+ "model_format": "pytorch",
5338
+ "model_size_in_billions": 236,
5339
+ "quantizations": [
5340
+ "4-bit",
5341
+ "8-bit",
5342
+ "none"
5343
+ ],
5344
+ "model_id": "deepseek-ai/DeepSeek-V2.5",
5345
+ "model_hub": "modelscope",
5346
+ "model_revision": "master"
5347
+ }
5348
+ ],
5349
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
5350
+ "stop_token_ids": [
5351
+ 100001
5352
+ ],
5353
+ "stop": [
5354
+ "<|end▁of▁sentence|>"
5355
+ ]
5356
+ },
5357
+ {
5358
+ "version": 1,
5359
+ "context_length": 163840,
5360
+ "model_name": "deepseek-v3",
5361
+ "model_lang": [
5362
+ "en",
5363
+ "zh"
5364
+ ],
5365
+ "model_ability": [
5366
+ "chat",
5367
+ "reasoning"
5368
+ ],
5369
+ "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
5370
+ "model_specs": [
5371
+ {
5372
+ "model_format": "pytorch",
5373
+ "model_size_in_billions": 671,
5374
+ "quantizations": [
5375
+ "4-bit",
5376
+ "8-bit",
5377
+ "none"
5378
+ ],
5379
+ "model_id": "deepseek-ai/DeepSeek-V3",
5380
+ "model_hub": "modelscope",
5381
+ "model_revision": "master"
5382
+ },
5383
+ {
5384
+ "model_format": "awq",
5385
+ "model_size_in_billions": 671,
5386
+ "quantizations": [
5387
+ "Int4"
5388
+ ],
5389
+ "model_id": "cognitivecomputations/DeepSeek-V3-awq",
5390
+ "model_hub": "modelscope",
5391
+ "model_revision": "master"
5392
+ },
5393
+ {
5394
+ "model_format": "ggufv2",
5395
+ "model_size_in_billions": 671,
5396
+ "quantizations": [
5397
+ "Q2_K_L",
5398
+ "Q2_K_XS",
5399
+ "Q3_K_M",
5400
+ "Q4_K_M",
5401
+ "Q5_K_M",
5402
+ "Q6_K",
5403
+ "Q8_0"
5404
+ ],
5405
+ "model_id": "unsloth/DeepSeek-V3-GGUF",
5406
+ "model_hub": "modelscope",
5407
+ "model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
5408
+ "model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
5409
+ "quantization_parts": {
5410
+ "Q2_K_L": [
5411
+ "00001-of-00005",
5412
+ "00002-of-00005",
5413
+ "00003-of-00005",
5414
+ "00004-of-00005",
5415
+ "00005-of-00005"
5416
+ ],
5417
+ "Q2_K_XS": [
5418
+ "00001-of-00005",
5419
+ "00002-of-00005",
5420
+ "00003-of-00005",
5421
+ "00004-of-00005",
5422
+ "00005-of-00005"
5423
+ ],
5424
+ "Q3_K_M": [
5425
+ "00001-of-00007",
5426
+ "00002-of-00007",
5427
+ "00003-of-00007",
5428
+ "00004-of-00007",
5429
+ "00005-of-00007",
5430
+ "00006-of-00007",
5431
+ "00007-of-00007"
5432
+ ],
5433
+ "Q4_K_M": [
5434
+ "00001-of-00009",
5435
+ "00002-of-00009",
5436
+ "00003-of-00009",
5437
+ "00004-of-00009",
5438
+ "00005-of-00009",
5439
+ "00006-of-00009",
5440
+ "00007-of-00009",
5441
+ "00008-of-00009",
5442
+ "00009-of-00009"
5443
+ ],
5444
+ "Q5_K_M": [
5445
+ "00001-of-00010",
5446
+ "00002-of-00010",
5447
+ "00003-of-00010",
5448
+ "00004-of-00010",
5449
+ "00005-of-00010",
5450
+ "00006-of-00010",
5451
+ "00007-of-00010",
5452
+ "00008-of-00010",
5453
+ "00009-of-00010",
5454
+ "00010-of-00010"
5455
+ ],
5456
+ "Q6_K": [
5457
+ "00001-of-00012",
5458
+ "00002-of-00012",
5459
+ "00003-of-00012",
5460
+ "00004-of-00012",
5461
+ "00005-of-00012",
5462
+ "00006-of-00012",
5463
+ "00007-of-00012",
5464
+ "00008-of-00012",
5465
+ "00009-of-00012",
5466
+ "00010-of-00012",
5467
+ "00011-of-00012",
5468
+ "00012-of-00012"
5469
+ ],
5470
+ "Q8_0": [
5471
+ "00001-of-00016",
5472
+ "00002-of-00016",
5473
+ "00003-of-00016",
5474
+ "00004-of-00016",
5475
+ "00005-of-00016",
5476
+ "00006-of-00016",
5477
+ "00007-of-00016",
5478
+ "00008-of-00016",
5479
+ "00009-of-00016",
5480
+ "00010-of-00016",
5481
+ "00011-of-00016",
5482
+ "00012-of-00016",
5483
+ "00013-of-00016",
5484
+ "00014-of-00016",
5485
+ "00015-of-00016",
5486
+ "00016-of-00016"
5487
+ ]
5488
+ }
5489
+ },
5490
+ {
5491
+ "model_format": "mlx",
5492
+ "model_size_in_billions": 671,
5493
+ "quantizations": [
5494
+ "3bit",
5495
+ "4bit"
5496
+ ],
5497
+ "model_id": "mlx-community/DeepSeek-V3-{quantization}",
5498
+ "model_hub": "modelscope"
5499
+ }
5500
+ ],
5501
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
5502
+ "stop_token_ids": [
5503
+ 1
5504
+ ],
5505
+ "stop": [
5506
+ "<|end▁of▁sentence|>"
5507
+ ],
5508
+ "reasoning_start_tag": "<think>",
5509
+ "reasoning_end_tag": "</think>"
5510
+ },
5511
+ {
5512
+ "version": 1,
5513
+ "context_length": 163840,
5514
+ "model_name": "deepseek-r1",
5515
+ "model_lang": [
5516
+ "en",
5517
+ "zh"
5518
+ ],
5519
+ "model_ability": [
5520
+ "chat"
5521
+ ],
5522
+ "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
5523
+ "model_specs": [
5524
+ {
5525
+ "model_format": "pytorch",
5526
+ "model_size_in_billions": 671,
5527
+ "quantizations": [
5528
+ "4-bit",
5529
+ "8-bit",
5530
+ "none"
5531
+ ],
5532
+ "model_id": "deepseek-ai/DeepSeek-R1",
5533
+ "model_hub": "modelscope",
5534
+ "model_revision": "master"
5535
+ },
5536
+ {
5537
+ "model_format": "awq",
5538
+ "model_size_in_billions": 671,
5539
+ "quantizations": [
5540
+ "Int4"
5541
+ ],
5542
+ "model_id": "cognitivecomputations/DeepSeek-R1-awq",
5543
+ "model_hub": "modelscope",
5544
+ "model_revision": "master"
5545
+ },
5546
+ {
5547
+ "model_format": "ggufv2",
5548
+ "model_size_in_billions": 671,
5549
+ "quantizations": [
5550
+ "UD-IQ1_S",
5551
+ "UD-IQ1_M",
5552
+ "UD-IQ2_XXS",
5553
+ "UD-Q2_K_XL",
5554
+ "Q2_K",
5555
+ "Q2_K_L",
5556
+ "Q2_K_XS",
5557
+ "Q3_K_M",
5558
+ "Q4_K_M",
5559
+ "Q5_K_M",
5560
+ "Q6_K",
5561
+ "Q8_0",
5562
+ "BF16"
5563
+ ],
5564
+ "model_id": "unsloth/DeepSeek-R1-GGUF",
5565
+ "model_hub": "modelscope",
5566
+ "model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
5567
+ "model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
5568
+ "quantization_parts": {
5569
+ "UD-IQ1_S": [
5570
+ "00001-of-00003",
5571
+ "00002-of-00003",
5572
+ "00003-of-00003"
5573
+ ],
5574
+ "UD-IQ1_M": [
5575
+ "00001-of-00004",
5576
+ "00002-of-00004",
5577
+ "00003-of-00004",
5578
+ "00004-of-00004"
5579
+ ],
5580
+ "UD-IQ2_XXS": [
5581
+ "00001-of-00004",
5582
+ "00002-of-00004",
5583
+ "00003-of-00004",
5584
+ "00004-of-00004"
5585
+ ],
5586
+ "UD-Q2_K_XL": [
5587
+ "00001-of-00005",
5588
+ "00002-of-00005",
5589
+ "00003-of-00005",
5590
+ "00004-of-00005",
5591
+ "00005-of-00005"
5592
+ ],
5593
+ "Q2_K": [
5594
+ "00001-of-00005",
5595
+ "00002-of-00005",
5596
+ "00003-of-00005",
5597
+ "00004-of-00005",
5598
+ "00005-of-00005"
5599
+ ],
5600
+ "Q2_K_L": [
5601
+ "00001-of-00005",
5602
+ "00002-of-00005",
5603
+ "00003-of-00005",
5604
+ "00004-of-00005",
5605
+ "00005-of-00005"
5606
+ ],
5607
+ "Q2_K_XS": [
5608
+ "00001-of-00005",
5609
+ "00002-of-00005",
5610
+ "00003-of-00005",
5611
+ "00004-of-00005",
5612
+ "00005-of-00005"
5613
+ ],
5614
+ "Q3_K_M": [
5615
+ "00001-of-00007",
5616
+ "00002-of-00007",
5617
+ "00003-of-00007",
5618
+ "00004-of-00007",
5619
+ "00005-of-00007",
5620
+ "00006-of-00007",
5621
+ "00007-of-00007"
5622
+ ],
5623
+ "Q4_K_M": [
5624
+ "00001-of-00009",
5625
+ "00002-of-00009",
5626
+ "00003-of-00009",
5627
+ "00004-of-00009",
5628
+ "00005-of-00009",
5629
+ "00006-of-00009",
5630
+ "00007-of-00009",
5631
+ "00008-of-00009",
5632
+ "00009-of-00009"
5633
+ ],
5634
+ "Q5_K_M": [
5635
+ "00001-of-00010",
5636
+ "00002-of-00010",
5637
+ "00003-of-00010",
5638
+ "00004-of-00010",
5639
+ "00005-of-00010",
5640
+ "00006-of-00010",
5641
+ "00007-of-00010",
5642
+ "00008-of-00010",
5643
+ "00009-of-00010",
5644
+ "00010-of-00010"
5645
+ ],
5646
+ "Q6_K": [
5647
+ "00001-of-00012",
5648
+ "00002-of-00012",
5649
+ "00003-of-00012",
5650
+ "00004-of-00012",
5651
+ "00005-of-00012",
5652
+ "00006-of-00012",
5653
+ "00007-of-00012",
5654
+ "00008-of-00012",
5655
+ "00009-of-00012",
5656
+ "00010-of-00012",
5657
+ "00011-of-00012",
5658
+ "00012-of-00012"
5659
+ ],
5660
+ "Q8_0": [
5661
+ "00001-of-00015",
5662
+ "00002-of-00015",
5663
+ "00003-of-00015",
5664
+ "00004-of-00015",
5665
+ "00005-of-00015",
5666
+ "00006-of-00015",
5667
+ "00007-of-00015",
5668
+ "00008-of-00015",
5669
+ "00009-of-00015",
5670
+ "00010-of-00015",
5671
+ "00011-of-00015",
5672
+ "00012-of-00015",
5673
+ "00013-of-00015",
5674
+ "00014-of-00015",
5675
+ "00015-of-00015"
5676
+ ],
5677
+ "BF16": [
5678
+ "00001-of-00030",
5679
+ "00002-of-00030",
5680
+ "00003-of-00030",
5681
+ "00004-of-00030",
5682
+ "00005-of-00030",
5683
+ "00006-of-00030",
5684
+ "00007-of-00030",
5685
+ "00008-of-00030",
5686
+ "00009-of-00030",
5687
+ "00010-of-00030",
5688
+ "00011-of-00030",
5689
+ "00012-of-00030",
5690
+ "00013-of-00030",
5691
+ "00014-of-00030",
5692
+ "00015-of-00030",
5693
+ "00016-of-00030",
5694
+ "00017-of-00030",
5695
+ "00018-of-00030",
5696
+ "00019-of-00030",
5697
+ "00020-of-00030",
5698
+ "00021-of-00030",
5699
+ "00022-of-00030",
5700
+ "00023-of-00030",
5701
+ "00024-of-00030",
5702
+ "00025-of-00030",
5703
+ "00026-of-00030",
5704
+ "00027-of-00030",
5705
+ "00028-of-00030",
5706
+ "00029-of-00030",
5707
+ "00030-of-00030"
5708
+ ]
5709
+ }
5710
+ },
5711
+ {
5712
+ "model_format": "mlx",
5713
+ "model_size_in_billions": 671,
5074
5714
  "quantizations": [
5075
- "4-bit",
5076
- "8-bit",
5077
- "none"
5715
+ "2bit",
5716
+ "3bit",
5717
+ "4bit"
5078
5718
  ],
5079
- "model_id": "deepseek-ai/DeepSeek-V2.5",
5080
- "model_hub": "modelscope",
5081
- "model_revision": "master"
5719
+ "model_id": "mlx-community/DeepSeek-R1-{quantization}",
5720
+ "model_hub": "modelscope"
5082
5721
  }
5083
5722
  ],
5084
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
5723
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
5085
5724
  "stop_token_ids": [
5086
- 100001
5725
+ 1
5087
5726
  ],
5088
5727
  "stop": [
5089
5728
  "<|end▁of▁sentence|>"
@@ -5558,7 +6197,7 @@
5558
6197
  "q8_0"
5559
6198
  ],
5560
6199
  "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF",
5561
- "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf",
6200
+ "model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf",
5562
6201
  "model_hub": "modelscope",
5563
6202
  "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
5564
6203
  "quantization_parts": {
@@ -6442,7 +7081,8 @@
6442
7081
  "zh"
6443
7082
  ],
6444
7083
  "model_ability": [
6445
- "chat"
7084
+ "chat",
7085
+ "reasoning"
6446
7086
  ],
6447
7087
  "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
6448
7088
  "model_specs": [
@@ -6473,6 +7113,19 @@
6473
7113
  "model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf",
6474
7114
  "model_hub": "modelscope"
6475
7115
  },
7116
+ {
7117
+ "model_format": "mlx",
7118
+ "model_size_in_billions": "1_5",
7119
+ "quantizations": [
7120
+ "3bit",
7121
+ "4bit",
7122
+ "6bit",
7123
+ "8bit",
7124
+ "bf16"
7125
+ ],
7126
+ "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}",
7127
+ "model_hub": "modelscope"
7128
+ },
6476
7129
  {
6477
7130
  "model_format": "pytorch",
6478
7131
  "model_size_in_billions": 7,
@@ -6613,13 +7266,137 @@
6613
7266
  "model_hub": "modelscope"
6614
7267
  }
6615
7268
  ],
6616
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7269
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
6617
7270
  "stop_token_ids": [
6618
7271
  151643
6619
7272
  ],
6620
7273
  "stop": [
6621
7274
  "<|end▁of▁sentence|>"
6622
- ]
7275
+ ],
7276
+ "reasoning_start_tag": "<think>",
7277
+ "reasoning_end_tag": "</think>"
7278
+ },
7279
+ {
7280
+ "version": 1,
7281
+ "context_length": 131072,
7282
+ "model_name": "deepseek-r1-distill-llama",
7283
+ "model_lang": [
7284
+ "en",
7285
+ "zh"
7286
+ ],
7287
+ "model_ability": [
7288
+ "chat",
7289
+ "reasoning"
7290
+ ],
7291
+ "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
7292
+ "model_specs": [
7293
+ {
7294
+ "model_format": "pytorch",
7295
+ "model_size_in_billions": 8,
7296
+ "quantizations": [
7297
+ "4-bit",
7298
+ "8-bit",
7299
+ "none"
7300
+ ],
7301
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
7302
+ "model_hub": "modelscope"
7303
+ },
7304
+ {
7305
+ "model_format": "ggufv2",
7306
+ "model_size_in_billions": 8,
7307
+ "quantizations": [
7308
+ "Q2_K",
7309
+ "Q2_K_L",
7310
+ "Q3_K_M",
7311
+ "Q4_K_M",
7312
+ "Q5_K_M",
7313
+ "Q6_K",
7314
+ "Q8_0",
7315
+ "F16"
7316
+ ],
7317
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
7318
+ "model_file_name_template": "DeepSeek-R1-Distill-Llama-8B-{quantization}.gguf",
7319
+ "model_hub": "modelscope"
7320
+ },
7321
+ {
7322
+ "model_format": "mlx",
7323
+ "model_size_in_billions": 8,
7324
+ "quantizations": [
7325
+ "3bit",
7326
+ "4bit",
7327
+ "6bit",
7328
+ "8bit",
7329
+ "bf16"
7330
+ ],
7331
+ "model_id": "okwinds/DeepSeek-R1-Distill-Llama-8B-MLX-{quantization}",
7332
+ "model_hub": "modelscope"
7333
+ },
7334
+ {
7335
+ "model_format": "pytorch",
7336
+ "model_size_in_billions": 70,
7337
+ "quantizations": [
7338
+ "4-bit",
7339
+ "8-bit",
7340
+ "none"
7341
+ ],
7342
+ "model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
7343
+ "model_hub": "modelscope"
7344
+ },
7345
+ {
7346
+ "model_format": "ggufv2",
7347
+ "model_size_in_billions": 70,
7348
+ "quantizations": [
7349
+ "Q2_K",
7350
+ "Q2_K_L",
7351
+ "Q3_K_M",
7352
+ "Q4_K_M",
7353
+ "Q5_K_M",
7354
+ "Q6_K",
7355
+ "Q8_0",
7356
+ "F16"
7357
+ ],
7358
+ "quantization_parts": {
7359
+ "Q6_K": [
7360
+ "00001-of-00002",
7361
+ "00002-of-00002"
7362
+ ],
7363
+ "Q8_0": [
7364
+ "00001-of-00002",
7365
+ "00002-of-00002"
7366
+ ],
7367
+ "F16": [
7368
+ "00001-of-00003",
7369
+ "00002-of-00003",
7370
+ "00003-of-00003"
7371
+ ]
7372
+ },
7373
+ "model_id": "unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF",
7374
+ "model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
7375
+ "model_file_name_split_template": "DeepSeek-R1-Distill-Llama-70B-{quantization}/DeepSeek-R1-Distill-Llama-70B-{quantization}-{part}.gguf",
7376
+ "model_hub": "modelscope"
7377
+ },
7378
+ {
7379
+ "model_format": "mlx",
7380
+ "model_size_in_billions": 70,
7381
+ "quantizations": [
7382
+ "3bit",
7383
+ "4bit",
7384
+ "6bit",
7385
+ "8bit"
7386
+ ],
7387
+ "model_id": "okwinds/DeepSeek-R1-Distill-Llama-70B-MLX-{quantization}",
7388
+ "model_hub": "modelscope"
7389
+ }
7390
+ ],
7391
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
7392
+ "stop_token_ids": [
7393
+ 151643
7394
+ ],
7395
+ "stop": [
7396
+ "<|end▁of▁sentence|>"
7397
+ ],
7398
+ "reasoning_start_tag": "<think>",
7399
+ "reasoning_end_tag": "</think>"
6623
7400
  },
6624
7401
  {
6625
7402
  "version": 1,
@@ -6911,7 +7688,7 @@
6911
7688
  "<|endoftext|>"
6912
7689
  ]
6913
7690
  },
6914
- {
7691
+ {
6915
7692
  "version": 1,
6916
7693
  "context_length": 32768,
6917
7694
  "model_name": "marco-o1",
@@ -7009,5 +7786,85 @@
7009
7786
  "<|user|>",
7010
7787
  "<|observation|>"
7011
7788
  ]
7789
+ },
7790
+ {
7791
+ "version": 1,
7792
+ "context_length": 32768,
7793
+ "model_name": "internlm3-instruct",
7794
+ "model_lang": [
7795
+ "en",
7796
+ "zh"
7797
+ ],
7798
+ "model_ability": [
7799
+ "chat",
7800
+ "tools"
7801
+ ],
7802
+ "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
7803
+ "model_specs": [
7804
+ {
7805
+ "model_format": "pytorch",
7806
+ "model_size_in_billions": 8,
7807
+ "quantizations": [
7808
+ "4-bit",
7809
+ "8-bit",
7810
+ "none"
7811
+ ],
7812
+ "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct",
7813
+ "model_hub": "modelscope"
7814
+ },
7815
+ {
7816
+ "model_format": "gptq",
7817
+ "model_size_in_billions": 8,
7818
+ "quantizations": [
7819
+ "Int4"
7820
+ ],
7821
+ "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gptq-int4",
7822
+ "model_hub": "modelscope"
7823
+ },
7824
+ {
7825
+ "model_format": "awq",
7826
+ "model_size_in_billions": 8,
7827
+ "quantizations": [
7828
+ "Int4"
7829
+ ],
7830
+ "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-awq",
7831
+ "model_hub": "modelscope"
7832
+ },
7833
+ {
7834
+ "model_format": "ggufv2",
7835
+ "model_size_in_billions": 8,
7836
+ "quantizations": [
7837
+ "q2_k",
7838
+ "q3_k_m",
7839
+ "q4_0",
7840
+ "q4_k_m",
7841
+ "q5_0",
7842
+ "q5_k_m",
7843
+ "q6_k",
7844
+ "q8_0"
7845
+ ],
7846
+ "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gguf",
7847
+ "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf",
7848
+ "model_hub": "modelscope"
7849
+ },
7850
+ {
7851
+ "model_format":"mlx",
7852
+ "model_size_in_billions":8,
7853
+ "quantizations":[
7854
+ "4bit"
7855
+ ],
7856
+ "model_hub": "modelscope",
7857
+ "model_id":"mlx-community/internlm3-8b-instruct-{quantization}"
7858
+ }
7859
+ ],
7860
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
7861
+ "stop_token_ids": [
7862
+ 2,
7863
+ 128131
7864
+ ],
7865
+ "stop": [
7866
+ "</s>",
7867
+ "<|im_end|>"
7868
+ ]
7012
7869
  }
7013
7870
  ]